import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
Dataset Creation¶
# nhanes target
nhanes = pd.read_csv("/Users/kevinnguyen/Downloads/nhanes_base_target_final.csv", usecols=range(1, len(pd.read_csv("/Users/kevinnguyen/Downloads/nhanes_base_target_final.csv").columns)))
# demographics
demographics_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_d_DEMO_D_0506.xpt")
demographics_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_d_DEMO_E_0708.xpt")
demographics_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_d_DEMO_F_0910.xpt")
demographics_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_d_DEMO_G_1112.xpt")
demographics_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_d_DEMO_H_1314.xpt")
demographics_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_d_DEMO_H_1516.xpt")
# 0506 column name for income is INDHHINC --> change to INDHHIN2 to match others
demographics_0506.rename(columns = {"INDHHINC": "INDHHIN2"}, inplace = True)
demographics_colnames = ["SEQN", "RIAGENDR", "RIDAGEYR", "RIDRETH1", "DMDEDUC2", "INDHHIN2"]
# dietary
dietary_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_dietary_DR1TOT_D_0506.xpt")
dietary_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_dietary_DR1TOT_E_0708.xpt")
dietary_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_dietary_DR1TOT_F_0910.xpt")
dietary_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_dietary_DR1TOT_G_1112.xpt")
dietary_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_dietary_DR1TOT_H_1314.xpt")
dietary_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_dietary_DR1TOT_I_1516.xpt")
dietary_colnames = ["SEQN", "DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"]
# examinations
examination_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_e_BMX_D_0506.xpt")
examination_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_e_BMX_E_0708.xpt")
examination_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_e_BMX_F_0910.xpt")
examination_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_e_BMX_G_1112.xpt")
examination_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_e_BMX_H_1314.xpt")
examination_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_e_BMX_H_1516.xpt")
examination_colnames = ["SEQN", "BMXBMI"]
# laboratory
## metals
CdHgPb_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_PBCD_D_0506.xpt")
CdHgPb_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_PBCD_E_0708.xpt")
CdHgPb_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_PBCD_F_0910.xpt")
CdHgPb_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_PBCD_G_1112.xpt")
CdHgPb_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_PBCD_H_1314.xpt")
CdHgPb_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_PBCD_I_1516.xpt")
arsenic_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_UAS_D_0506.xpt")
arsenic_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_UAS_E_0708.xpt")
arsenic_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_UAS_F_0910.xpt")
arsenic_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_UAS_G_1112.xpt")
arsenic_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_UTAS_H_1314.xpt")
arsenic_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_UTAS_I_1516.xpt")
## glucose & insulin
glu_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_GLU_D_0506.xpt")
glu_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_GLU_E_0708.xpt")
glu_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_GLU_F_0910.xpt")
glu_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_GLU_G_1112.xpt")
glu_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_GLU_H_1314.xpt")
glu_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_GLU_I_1516.xpt")
ins_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_INS_H_1314.xpt")
ins_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_INS_I_1516.xpt")
## pfas
pfa_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_PFC_D_0506.xpt")
pfa_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_PFC_E_0708.xpt")
pfa_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_PFC_F_0910.xpt")
pfa_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_PFC_G_1112.xpt")
pfa_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_PFAS_H_1314.xpt")
pfa_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_PFAS_I_1516.xpt")
## pahs
pah_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_PAH_D_0506.xpt")
pah_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_PAH_E_0708.xpt")
pah_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_PAH_F_0910.xpt")
pah_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_PAH_G_1112.xpt")
pah_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_PAH_H_1314.xpt")
pah_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_PAH_I_1516.xpt")
## environmental phalates
eph_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_EPH_D_0506.xpt")
eph_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_EPH_E_0708.xpt")
eph_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_EPH_F_0910.xpt")
eph_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_EPH_G_1112.xpt")
eph_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_EPHPP_H_1314.xpt")
eph_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_EPHPP_I_1516.xpt")
## phthalates
phthalate_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_l_PHTHTE_D_0506.xpt")
phthalate_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_l_PHTHTE_E_0708.xpt")
phthalate_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_l_PHTHTE_F_0910.xpt")
phthalate_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_l_PHTHTE_G_1112.xpt")
phthalate_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_l_PHTHTE_H_1314.xpt")
phthalate_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_l_PHTHTE_I_1516.xpt")
# combining ins dataset with glu for 1314 and 1516
ins_1314_new = ins_1314[["SEQN", "LBXIN"]]
ins_1516_new = ins_1516[["SEQN", "LBXIN"]]
glu_1314 = pd.merge(glu_1314, ins_1314_new, on='SEQN', how='left')
glu_1516 = pd.merge(glu_1516, ins_1516_new, on='SEQN', how='left')
glucose_colnames = ["SEQN", "LBXGLU", "LBXIN"]
CdHgPb_colnames = ["SEQN", "LBXBCD", "LBXBPB", "LBXTHG"]
arsenic_colnames = ["SEQN", "URXUAS"]
pfa_colnames = ["SEQN", "LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"]
pah_colnames = ["SEQN", "URXP01", "URXP02", "URXP03", "URXP04", "URXP06"]
eph_colnames = ["SEQN", "URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"]
phthalate_colnames = ["SEQN", "URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"]
# questionnaires
## alcohol
alq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_ALQ_D_0506.xpt")
alq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_ALQ_E_0708.xpt")
alq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_ALQ_F_0910.xpt")
alq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_ALQ_G_1112.xpt")
alq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_ALQ_H_1314.xpt")
alq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_ALQ_I_1516.xpt")
## physical activity
paq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_PAQ_D_0506.xpt")
paq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_PAQ_E_0708.xpt")
paq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_PAQ_F_0910.xpt")
paq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_PAQ_G_1112.xpt")
paq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_PAQ_H_1314.xpt")
paq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_PAQ_I_1516.xpt")
## smoking
smq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_SMQ_D_0506.xpt")
smq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_SMQ_E_0708.xpt")
smq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_SMQ_F_0910.xpt")
smq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_SMQ_G_1112.xpt")
smq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_SMQ_H_1314.xpt")
smq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_SMQ_I_1516.xpt")
## mental health
dpq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_DPQ_D_0506.xpt")
dpq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_DPQ_E_0708.xpt")
dpq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_DPQ_F_0910.xpt")
dpq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_DPQ_G_1112.xpt")
dpq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_DPQ_H_1314.xpt")
dpq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_DPQ_I_1516.xpt")
## sleep disorders
slq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_SLQ_D_0506.xpt")
slq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_SLQ_E_0708.xpt")
slq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_SLQ_F_0910.xpt")
slq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_SLQ_G_1112.xpt")
slq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_SLQ_H_1314.xpt")
slq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_SLQ_I_1516.xpt")
## medical conditions
mcq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_MCQ_D_0506.xpt")
mcq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_MCQ_E_0708.xpt")
mcq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_MCQ_F_0910.xpt")
mcq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_MCQ_G_1112.xpt")
mcq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_MCQ_H_1314.xpt")
mcq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_MCQ_I_1516.xpt")
## hospital utilization & access to care
huq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_HUQ_D_0506.xpt")
huq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_HUQ_E_0708.xpt")
huq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_HUQ_F_0910.xpt")
huq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_HUQ_G_1112.xpt")
huq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_HUQ_H_1314.xpt")
huq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_HUQ_I_1516.xpt")
## health insurance
hiq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_HIQ_D_0506.xpt")
hiq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_HIQ_E_0708.xpt")
hiq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_HIQ_F_0910.xpt")
hiq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_HIQ_G_1112.xpt")
hiq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_HIQ_H_1314.xpt")
hiq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_HIQ_I_1516.xpt")
## housing characteristics
hoq_0506 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0506/nhanes_q_HOQ_D_0506.xpt")
hoq_0708 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_0708/nhanes_q_HOQ_E_0708.xpt")
hoq_0910 = pd.read_sas("/Users/kevinnguyen/Downloads/nahnes_0910/nhanes_q_HOQ_F_0910.xpt")
hoq_1112 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1112/nhanes_q_HOQ_G_1112.xpt")
hoq_1314 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1314/nhanes_q_HOQ_H_1314.xpt")
hoq_1516 = pd.read_sas("/Users/kevinnguyen/Downloads/nhanes_1516/nhanes_q_HOQ_I_1516.xpt")
alq_colnames = ["SEQN", "ALQ130"]
# 0506, 0708, 0910 column name for # hours watch TV or videos past 30 days is PAD590 --> change to PAQ710 to match the rest
paq_0506.rename(columns = {"PAD590": "PAQ710"}, inplace = True)
paq_0708.rename(columns = {"PAD590": "PAQ710"}, inplace = True)
paq_0910.rename(columns = {"PAD590": "PAQ710"}, inplace = True)
paq_colnames = ["SEQN", "PAQ710"]
smq_colnames = ["SEQN", "SMQ020"]
dpq_colnames = ["SEQN", "DPQ010"]
slq_colnames = ["SEQN", "SLQ050"]
mcq_colnames = ["SEQN", "MCQ300C"]
# 0506, 0708, 0910, 1112 column name for # times received healthcare over past year is HUQ050 --> change to HUQ051 to match 1516
huq_0506.rename(columns = {"HUQ050": "HUQ051"}, inplace = True)
huq_0708.rename(columns = {"HUQ050": "HUQ051"}, inplace = True)
huq_0910.rename(columns = {"HUQ050": "HUQ051"}, inplace = True)
huq_1112.rename(columns = {"HUQ050": "HUQ051"}, inplace = True)
huq_colnames = ["SEQN", "HUQ051"]
hiq_colnames = ["SEQN", "HIQ011"]
hoq_colnames = ["SEQN", "HOQ065"]
# cleaning dataframes
## demographics
demographics_0506 = demographics_0506[demographics_colnames]
demographics_0708 = demographics_0708[demographics_colnames]
demographics_0910 = demographics_0910[demographics_colnames]
demographics_1112 = demographics_1112[demographics_colnames]
demographics_1314 = demographics_1314[demographics_colnames]
demographics_1516 = demographics_1516[demographics_colnames]
demographics = pd.concat([demographics_0506, demographics_0708, demographics_0910, demographics_1112, demographics_1314, demographics_1516], ignore_index=True)
## dietary
dietary_0506 = dietary_0506[dietary_colnames]
dietary_0708 = dietary_0708[dietary_colnames]
dietary_0910 = dietary_0910[dietary_colnames]
dietary_1112 = dietary_1112[dietary_colnames]
dietary_1314 = dietary_1314[dietary_colnames]
dietary_1516 = dietary_1516[dietary_colnames]
dietary = pd.concat([dietary_0506, dietary_0708, dietary_0910, dietary_1112, dietary_1314, dietary_1516], ignore_index=True)
## examinations
examination_0506 = examination_0506[examination_colnames]
examination_0708 = examination_0708[examination_colnames]
examination_0910 = examination_0910[examination_colnames]
examination_1112 = examination_1112[examination_colnames]
examination_1314 = examination_1314[examination_colnames]
examination_1516 = examination_1516[examination_colnames]
examinations = pd.concat([examination_0506, examination_0708, examination_0910, examination_1112, examination_1314, examination_1516], ignore_index=True)
## metals
CdHgPb_0506 = CdHgPb_0506[CdHgPb_colnames]
CdHgPb_0708 = CdHgPb_0708[CdHgPb_colnames]
CdHgPb_0910 = CdHgPb_0910[CdHgPb_colnames]
CdHgPb_1112 = CdHgPb_1112[CdHgPb_colnames]
CdHgPb_1314 = CdHgPb_1314[CdHgPb_colnames]
CdHgPb_1516 = CdHgPb_1516[CdHgPb_colnames]
arsenic_0506 = arsenic_0506[arsenic_colnames]
arsenic_0708 = arsenic_0708[arsenic_colnames]
arsenic_0910 = arsenic_0910[arsenic_colnames]
arsenic_1112 = arsenic_1112[arsenic_colnames]
arsenic_1314 = arsenic_1314[arsenic_colnames]
arsenic_1516 = arsenic_1516[arsenic_colnames]
CdHgPb = pd.concat([CdHgPb_0506, CdHgPb_0708, CdHgPb_0910, CdHgPb_1112, CdHgPb_1314, CdHgPb_1516], ignore_index=True)
arsenic = pd.concat([arsenic_0506, arsenic_0708, arsenic_0910, arsenic_1112, arsenic_1314, arsenic_1516], ignore_index=True)
## glucose & insulin
glu_0506 = glu_0506[glucose_colnames]
glu_0708 = glu_0708[glucose_colnames]
glu_0910 = glu_0910[glucose_colnames]
glu_1112 = glu_1112[glucose_colnames]
glu_1314 = glu_1314[glucose_colnames]
glu_1516 = glu_1516[glucose_colnames]
glucose = pd.concat([glu_0506, glu_0708, glu_0910, glu_1112, glu_1314, glu_1516], ignore_index=True)
## pfas
pfa_0506 = pfa_0506[pfa_colnames]
pfa_0708 = pfa_0708[pfa_colnames]
pfa_0910 = pfa_0910[pfa_colnames]
pfa_1112 = pfa_1112[pfa_colnames]
pfa_1314 = pfa_1314[pfa_colnames]
pfa_1516 = pfa_1516[pfa_colnames]
pfa = pd.concat([pfa_0506, pfa_0708, pfa_0910, pfa_1112, pfa_1314, pfa_1516], ignore_index=True)
## pahs
pah_0506 = pah_0506[pah_colnames]
pah_0708 = pah_0708[pah_colnames]
pah_0910 = pah_0910[pah_colnames]
pah_1112 = pah_1112[pah_colnames]
pah_1314 = pah_1314[pah_colnames]
pah_1516 = pah_1516[pah_colnames]
pah = pd.concat([pah_0506, pah_0708, pah_0910, pah_1112, pah_1314, pah_1516], ignore_index=True)
## environmental phalates
eph_0506 = eph_0506[eph_colnames]
eph_0708 = eph_0708[eph_colnames]
eph_0910 = eph_0910[eph_colnames]
eph_1112 = eph_1112[eph_colnames]
eph_1314 = eph_1314[eph_colnames]
eph_1516 = eph_1516[eph_colnames]
eph = pd.concat([eph_0506, eph_0708, eph_0910, eph_1112, eph_1314, eph_1516], ignore_index=True)
## phthalates
phthalate_0506 = phthalate_0506[phthalate_colnames]
phthalate_0708 = phthalate_0708[phthalate_colnames]
phthalate_0910 = phthalate_0910[phthalate_colnames]
phthalate_1112 = phthalate_1112[phthalate_colnames]
phthalate_1314 = phthalate_1314[phthalate_colnames]
phthalate_1516 = phthalate_1516[phthalate_colnames]
phthalate = pd.concat([phthalate_0506, phthalate_0708, phthalate_0910, phthalate_1112, phthalate_1314, phthalate_1516], ignore_index=True)
## alcohol
alq_0506 = alq_0506[alq_colnames]
alq_0708 = alq_0708[alq_colnames]
alq_0910 = alq_0910[alq_colnames]
alq_1112 = alq_1112[alq_colnames]
alq_1314 = alq_1314[alq_colnames]
alq_1516 = alq_1516[alq_colnames]
alq = pd.concat([alq_0506, alq_0708, alq_0910, alq_1112, alq_1314, alq_1516], ignore_index=True)
## physical activity
paq_0506 = paq_0506[paq_colnames]
paq_0708 = paq_0708[paq_colnames]
paq_0910 = paq_0910[paq_colnames]
paq_1112 = paq_1112[paq_colnames]
paq_1314 = paq_1314[paq_colnames]
paq_1516 = paq_1516[paq_colnames]
paq = pd.concat([paq_0506, paq_0708, paq_0910, paq_1112, paq_1314, paq_1516], ignore_index=True)
## smoking
smq_0506 = smq_0506[smq_colnames]
smq_0708 = smq_0708[smq_colnames]
smq_0910 = smq_0910[smq_colnames]
smq_1112 = smq_1112[smq_colnames]
smq_1314 = smq_1314[smq_colnames]
smq_1516 = smq_1516[smq_colnames]
smq = pd.concat([smq_0506, smq_0708, smq_0910, smq_1112, smq_1314, smq_1516], ignore_index=True)
## mental health
dpq_0506 = dpq_0506[dpq_colnames]
dpq_0708 = dpq_0708[dpq_colnames]
dpq_0910 = dpq_0910[dpq_colnames]
dpq_1112 = dpq_1112[dpq_colnames]
dpq_1314 = dpq_1314[dpq_colnames]
dpq_1516 = dpq_1516[dpq_colnames]
dpq = pd.concat([dpq_0506, dpq_0708, dpq_0910, dpq_1112, dpq_1314, dpq_1516], ignore_index=True)
## sleep disorders
slq_0506 = slq_0506[slq_colnames]
slq_0708 = slq_0708[slq_colnames]
slq_0910 = slq_0910[slq_colnames]
slq_1112 = slq_1112[slq_colnames]
slq_1314 = slq_1314[slq_colnames]
slq_1516 = slq_1516[slq_colnames]
slq = pd.concat([slq_0506, slq_0708, slq_0910, slq_1112, slq_1314, slq_1516], ignore_index=True)
## medical conditions
mcq_0506 = mcq_0506[mcq_colnames]
mcq_0708 = mcq_0708[mcq_colnames]
mcq_0910 = mcq_0910[mcq_colnames]
mcq_1112 = mcq_1112[mcq_colnames]
mcq_1314 = mcq_1314[mcq_colnames]
mcq_1516 = mcq_1516[mcq_colnames]
mcq = pd.concat([mcq_0506, mcq_0708, mcq_0910, mcq_1112, mcq_1314, mcq_1516], ignore_index=True)
## hospital utilization & access to care
huq_0506 = huq_0506[huq_colnames]
huq_0708 = huq_0708[huq_colnames]
huq_0910 = huq_0910[huq_colnames]
huq_1112 = huq_1112[huq_colnames]
huq_1314 = huq_1314[huq_colnames]
huq_1516 = huq_1516[huq_colnames]
huq = pd.concat([huq_0506, huq_0708, huq_0910, huq_1112, huq_1314, huq_1516], ignore_index=True)
## health insurance
hiq_0506 = hiq_0506[hiq_colnames]
hiq_0708 = hiq_0708[hiq_colnames]
hiq_0910 = hiq_0910[hiq_colnames]
hiq_1112 = hiq_1112[hiq_colnames]
hiq_1314 = hiq_1314[hiq_colnames]
hiq_1516 = hiq_1516[hiq_colnames]
hiq = pd.concat([hiq_0506, hiq_0708, hiq_0910, hiq_1112, hiq_1314, hiq_1516], ignore_index=True)
## housing characteristics
hoq_0506 = hoq_0506[hoq_colnames]
hoq_0708 = hoq_0708[hoq_colnames]
hoq_0910 = hoq_0910[hoq_colnames]
hoq_1112 = hoq_1112[hoq_colnames]
hoq_1314 = hoq_1314[hoq_colnames]
hoq_1516 = hoq_1516[hoq_colnames]
hoq = pd.concat([hoq_0506, hoq_0708, hoq_0910, hoq_1112, hoq_1314, hoq_1516], ignore_index=True)
# merging datasets
dfs = [demographics, dietary, examinations, CdHgPb, arsenic, glucose, pfa, pah, eph, phthalate, alq, paq, smq, dpq, slq, mcq, huq, hiq, hoq]
nhanes_merged = nhanes
for df in dfs:
nhanes_merged = pd.merge(nhanes_merged, df, on = "SEQN", how = "left")
nhanes_merged.head(100)
| SEQN | diabetes | LBDGLUSI | LBDGLTSI | BPQ020 | BPQ080 | DID060 | RHD143 | RIAGENDR | RIDAGEYR | RIDRETH1 | DMDEDUC2 | INDHHIN2 | DR1TKCAL | DR1TCARB | DR1TSUGR | DR1TTFAT | DR1TFIBE | DR1_320Z | BMXBMI | LBXBCD | LBXBPB | LBXTHG | URXUAS | LBXGLU | LBXIN | LBXMPAH | LBXPFDO | LBXPFNA | LBXPFHS | LBXPFDE | LBXPFUA | URXP01 | URXP02 | URXP03 | URXP04 | URXP06 | URXBPH | URXTRS | URXBP3 | URXBUP | URXEPB | URXMPB | URXPPB | URXCNP | URXCOP | URXECP | URXMBP | URXMC1 | URXMEP | URXMHH | URXMHP | URXMIB | URXMNP | URXMOH | URXMZP | ALQ130 | PAQ710 | SMQ020 | DPQ010 | SLQ050 | MCQ300C | HUQ051 | HIQ011 | HOQ065 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 83733 | 0 | 5.59 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 53.0 | 3.0 | 3.0 | 4.0 | 2964.0 | 356.85 | 180.84 | 77.91 | 7.3 | 5.070000e+02 | 30.8 | 3.53 | 2.60 | 3.08 | 6.47 | 101.0 | 17.26 | NaN | NaN | NaN | NaN | NaN | NaN | 26200.0 | 36800.0 | 1950.0 | 2540.0 | 452.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6.0 | 5.000000e+00 | 1.0 | 1.000000e+00 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | 1.0 |
| 1 | 83736 | 0 | 4.66 | 0.00 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 42.0 | 4.0 | 4.0 | 7.0 | 604.0 | 90.30 | 71.84 | 19.63 | 2.0 | 5.397605e-79 | 20.3 | NaN | NaN | NaN | NaN | 84.0 | 5.42 | 0.07 | 0.07 | 0.5 | 0.60 | 0.10 | 0.20 | NaN | NaN | NaN | NaN | NaN | 1.30 | 28.8 | 12.10 | 0.07 | 20.10 | 79.0 | 64.5 | 2.2 | 5.6 | 14.1 | 56.2 | 2.00 | 244.7 | 8.7 | 2.00 | 73.7 | 0.64 | 7.8 | 63.60 | 1.0 | 4.000000e+00 | 2.0 | 1.000000e+00 | 1.0 | 9.0 | 2.000000e+00 | 1.0 | 2.0 |
| 2 | 83737 | 0 | 5.93 | 5.50 | 2.0 | 2.0 | NaN | NaN | 2.0 | 72.0 | 1.0 | 2.0 | 14.0 | 1304.0 | 153.43 | 22.31 | 43.08 | 16.1 | 7.182000e+02 | 28.6 | NaN | NaN | NaN | NaN | 107.0 | 8.24 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 2.0 | 1.0 |
| 3 | 83741 | 0 | 5.27 | 5.27 | 2.0 | 2.0 | NaN | NaN | 1.0 | 22.0 | 4.0 | 4.0 | 7.0 | 2338.0 | 282.58 | 167.72 | 91.07 | 11.0 | 7.200000e+02 | 28.0 | 0.20 | 0.72 | 1.38 | 6.11 | 95.0 | 11.39 | NaN | NaN | NaN | NaN | NaN | NaN | 1400.0 | 2020.0 | 83.0 | 81.0 | 41.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | 1.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 2.0 | 2.0 |
| 4 | 83743 | 0 | 5.38 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 18.0 | 5.0 | NaN | 15.0 | NaN | NaN | NaN | NaN | NaN | NaN | 26.2 | 0.12 | 0.61 | 4.30 | 8.90 | 97.0 | 11.40 | NaN | NaN | NaN | NaN | NaN | NaN | 777.0 | 772.0 | 20.0 | 34.0 | 43.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 2.0 | NaN | 2.0 | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 5 | 83749 | 0 | 4.88 | 5.94 | 2.0 | 2.0 | NaN | NaN | 2.0 | 17.0 | 3.0 | NaN | 14.0 | 2461.0 | 289.33 | 177.07 | 100.19 | 7.3 | 2.400000e+02 | 29.0 | NaN | NaN | NaN | NaN | 88.0 | 16.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.000000e+00 | NaN | NaN | 1.0 | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 6 | 83753 | 0 | 5.71 | 0.00 | NaN | NaN | NaN | NaN | 1.0 | 15.0 | 4.0 | NaN | 8.0 | 2277.0 | 272.01 | 145.02 | 85.91 | 10.7 | 1.974000e+03 | 24.5 | 0.11 | 0.47 | 0.37 | 15.59 | 103.0 | 5.43 | NaN | NaN | NaN | NaN | NaN | NaN | 957.0 | 16700.0 | 208.0 | 525.0 | 191.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.000000e+00 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 7 | 83761 | 0 | 5.27 | 9.10 | 2.0 | 2.0 | NaN | NaN | 2.0 | 24.0 | 5.0 | 5.0 | 1.0 | 2055.0 | 234.44 | 122.15 | 94.82 | 11.5 | 8.700000e+02 | 25.3 | 1.11 | 1.80 | 3.22 | 32.34 | 95.0 | 13.23 | NaN | NaN | NaN | NaN | NaN | NaN | 368.0 | 7260.0 | 25.0 | 87.0 | 109.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 8.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 9.0 | 2.0 |
| 8 | 83770 | 0 | 5.50 | 6.44 | NaN | NaN | NaN | NaN | 1.0 | 15.0 | 4.0 | NaN | 4.0 | 885.0 | 179.37 | 106.99 | 17.64 | 9.5 | 2.535000e+02 | 18.3 | NaN | NaN | NaN | NaN | 99.0 | 4.33 | 0.07 | 0.07 | 0.4 | 0.40 | 0.20 | 0.10 | NaN | NaN | NaN | NaN | NaN | 0.80 | 2.2 | 3.10 | 0.07 | 0.71 | 3.4 | 1.0 | 2.4 | 29.4 | 5.6 | 5.5 | 1.70 | 12.1 | 3.0 | 0.57 | 12.5 | 1.10 | 2.1 | 5.30 | NaN | 2.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 1.0 |
| 9 | 83778 | 0 | 5.54 | 4.44 | 2.0 | 2.0 | NaN | NaN | 1.0 | 16.0 | 2.0 | NaN | 5.0 | 3335.0 | 294.47 | 160.73 | 169.00 | 8.8 | 3.450000e+02 | 34.5 | NaN | NaN | NaN | NaN | 100.0 | 28.46 | 0.07 | 0.07 | 0.5 | 0.40 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.70 | 2.5 | 30.70 | 0.07 | 0.71 | 16.0 | 1.6 | 1.1 | 6.8 | 5.3 | 8.3 | 1.50 | 93.0 | 5.9 | 2.00 | 14.5 | 1.00 | 2.9 | 6.90 | NaN | 3.000000e+00 | NaN | NaN | 2.0 | NaN | 3.000000e+00 | 1.0 | 1.0 |
| 10 | 83781 | 0 | 5.38 | 6.77 | 2.0 | 2.0 | NaN | NaN | 2.0 | 27.0 | 4.0 | 5.0 | 77.0 | 2802.0 | 341.58 | 154.88 | 107.08 | 22.8 | 2.028900e+03 | 34.0 | NaN | NaN | NaN | NaN | 97.0 | 13.97 | 0.07 | 0.07 | 1.1 | 0.70 | 0.30 | 0.30 | NaN | NaN | NaN | NaN | NaN | 0.60 | 297.0 | 4.90 | 2.30 | 2.40 | 55.2 | 12.5 | 1.1 | 2.1 | 4.0 | 17.5 | 0.60 | 84.3 | 2.7 | 2.10 | 14.7 | 0.64 | 1.9 | 17.30 | 3.0 | 5.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 1.0 | 2.0 |
| 11 | 83790 | 1 | 22.10 | 30.10 | 2.0 | 2.0 | NaN | NaN | 1.0 | 56.0 | 3.0 | 1.0 | 4.0 | 7455.0 | 1177.49 | 885.92 | 229.37 | 19.8 | 2.394000e+03 | 24.4 | NaN | NaN | NaN | NaN | 397.0 | 4.36 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.000000e+00 | 1.0 | 2.000000e+00 | 2.0 | 1.0 | 5.397605e-79 | 1.0 | 1.0 |
| 12 | 83799 | 0 | 5.55 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 37.0 | 2.0 | 4.0 | 14.0 | NaN | NaN | NaN | NaN | NaN | NaN | 25.5 | 0.40 | 1.01 | 1.28 | NaN | 100.0 | 4.67 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.000000e+00 | 2.0 | NaN | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 2.0 |
| 13 | 83809 | 0 | 5.22 | 6.77 | 2.0 | 2.0 | NaN | NaN | 2.0 | 20.0 | 4.0 | 3.0 | 14.0 | 1445.0 | 62.87 | 13.13 | 95.92 | 4.4 | 8.700000e+02 | 26.2 | 0.10 | 0.21 | 0.28 | NaN | 94.0 | 7.19 | 0.07 | 0.07 | 0.5 | 0.70 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.50 | 16.8 | 0.28 | 3.70 | 119.10 | 329.8 | 34.0 | 0.3 | 6.9 | 1.8 | 1.0 | 1.20 | 105.7 | 1.1 | 0.57 | 2.3 | 0.64 | 0.7 | 1.00 | NaN | 3.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 2.0 | 2.0 |
| 14 | 83813 | 0 | 5.83 | 3.44 | 2.0 | 2.0 | NaN | NaN | 1.0 | 24.0 | 3.0 | 4.0 | 6.0 | 2585.0 | 301.16 | 168.98 | 104.30 | 9.7 | 3.300000e+03 | 26.9 | 1.86 | 1.13 | 2.62 | 1.10 | 105.0 | 12.15 | NaN | NaN | NaN | NaN | NaN | NaN | 5320.0 | 4010.0 | 223.0 | 219.0 | 43.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 5.397605e-79 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 2.0 |
| 15 | 83815 | 0 | 4.94 | 6.61 | NaN | NaN | NaN | NaN | 2.0 | 15.0 | 4.0 | NaN | 6.0 | 1506.0 | 210.45 | 102.58 | 51.38 | 9.9 | 8.700000e+02 | 32.0 | 0.07 | 0.31 | 0.36 | 2.52 | 89.0 | 31.22 | NaN | NaN | NaN | NaN | NaN | NaN | 193.0 | 1090.0 | 26.0 | 57.0 | 36.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.000000e+00 | NaN | NaN | NaN | NaN | 5.397605e-79 | 1.0 | 2.0 |
| 16 | 83816 | 0 | 5.11 | 4.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 27.0 | 3.0 | 4.0 | 7.0 | 2425.0 | 300.18 | 133.75 | 82.43 | 18.2 | 7.500000e+02 | 18.6 | 0.16 | 0.57 | 0.94 | 11.17 | 92.0 | 5.03 | NaN | NaN | NaN | NaN | NaN | NaN | 7030.0 | 17400.0 | 824.0 | 816.0 | 245.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 2.000000e+00 | 2.0 | 1.000000e+00 | 1.0 | 2.0 | 5.397605e-79 | 1.0 | 2.0 |
| 17 | 83822 | 0 | 4.44 | 4.66 | 2.0 | 2.0 | NaN | NaN | 2.0 | 20.0 | 4.0 | 4.0 | 6.0 | 833.0 | 112.45 | 59.85 | 21.30 | 2.9 | 4.635000e+02 | 22.2 | 0.35 | 0.40 | 0.58 | 3.44 | 80.0 | 11.76 | NaN | NaN | NaN | NaN | NaN | NaN | 846.0 | 9800.0 | 52.0 | 137.0 | 73.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.000000e+00 | 2.0 | 1.000000e+00 | 2.0 | 2.0 | 1.000000e+00 | 2.0 | 2.0 |
| 18 | 83823 | 0 | 5.66 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 29.0 | 1.0 | 1.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | 29.7 | 0.27 | 7.30 | 0.67 | NaN | 102.0 | 19.77 | 0.20 | 0.07 | 0.5 | 0.40 | 0.20 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.40 | 1.2 | 2.60 | 0.07 | 0.71 | 133.2 | 51.3 | 0.7 | 9.3 | 2.9 | 9.3 | 0.28 | 10.9 | 1.3 | 0.57 | 7.6 | 0.64 | 1.0 | 30.80 | NaN | 1.000000e+00 | 2.0 | NaN | 2.0 | 2.0 | 1.000000e+00 | 2.0 | 2.0 |
| 19 | 83825 | 0 | 5.61 | 5.55 | 2.0 | 2.0 | NaN | NaN | 2.0 | 16.0 | 4.0 | NaN | 6.0 | 1009.0 | 159.10 | 77.54 | 32.04 | 4.2 | 3.600000e+02 | 21.6 | NaN | NaN | NaN | NaN | 101.0 | 9.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.000000e+00 | NaN | NaN | 2.0 | NaN | 5.397605e-79 | 1.0 | 1.0 |
| 20 | 83828 | 0 | 5.61 | 8.22 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 39.0 | 1.0 | 3.0 | 4.0 | 2068.0 | 256.01 | 98.96 | 87.27 | 14.6 | 1.800000e+03 | 27.2 | NaN | NaN | NaN | NaN | 101.0 | 8.84 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 5.397605e-79 | 2.0 | 1.000000e+00 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | 1.0 |
| 21 | 83830 | 0 | 5.33 | 6.22 | NaN | NaN | NaN | NaN | 1.0 | 15.0 | 4.0 | NaN | 8.0 | 484.0 | 71.78 | 19.59 | 18.16 | 2.9 | 1.245000e+03 | 25.1 | NaN | NaN | NaN | NaN | 96.0 | 4.30 | 0.10 | 0.07 | 0.5 | 0.60 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 1.60 | 4.2 | 16.60 | 0.07 | 0.71 | 18.4 | 1.9 | 1.5 | 3.0 | 4.5 | 9.7 | 0.28 | 17.6 | 3.3 | 0.57 | 7.5 | 0.64 | 1.9 | 6.50 | NaN | 1.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 1.0 |
| 22 | 83831 | 0 | 5.05 | 0.00 | NaN | NaN | NaN | NaN | 2.0 | 15.0 | 5.0 | NaN | 6.0 | 1169.0 | 117.55 | 60.58 | 40.17 | 3.8 | 1.500000e+02 | 30.5 | 0.07 | 0.40 | 0.20 | 2.65 | 91.0 | 38.70 | NaN | NaN | NaN | NaN | NaN | NaN | 819.0 | 1820.0 | 126.0 | 248.0 | 144.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 2.0 |
| 23 | 83832 | 0 | 5.77 | 7.44 | 2.0 | 2.0 | NaN | NaN | 2.0 | 50.0 | 1.0 | 1.0 | 7.0 | 2852.0 | 347.64 | 133.32 | 110.36 | 27.3 | 9.150000e+02 | 42.6 | 0.25 | 1.34 | 2.93 | 32.39 | 104.0 | 20.46 | NaN | NaN | NaN | NaN | NaN | NaN | 1990.0 | 18400.0 | 60.0 | 195.0 | 102.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | 8.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 1.000000e+00 | 2.0 | 2.0 |
| 24 | 83833 | 0 | 5.16 | 4.55 | NaN | NaN | NaN | NaN | 1.0 | 14.0 | 3.0 | NaN | 10.0 | 2579.0 | 354.77 | 149.74 | 90.66 | 18.3 | 5.397605e-79 | 17.7 | 0.07 | 0.69 | 0.20 | NaN | 93.0 | 9.65 | 0.07 | 0.07 | 0.7 | 1.30 | 0.20 | 0.07 | NaN | NaN | NaN | NaN | NaN | 2.60 | 6.3 | 84.30 | 0.07 | 0.71 | 5.7 | 0.4 | 3.3 | 4.4 | 11.1 | 12.8 | 0.40 | 34.5 | 5.7 | 0.90 | 26.5 | 0.64 | 4.5 | 2.60 | NaN | 3.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 1.0 |
| 25 | 83835 | 1 | 5.22 | 11.50 | NaN | NaN | NaN | NaN | 2.0 | 13.0 | 4.0 | NaN | 2.0 | 1099.0 | 144.23 | 45.45 | 43.82 | 6.6 | 5.070000e+02 | 29.8 | NaN | NaN | NaN | NaN | 94.0 | 82.68 | 0.20 | 0.07 | 1.6 | 0.50 | 2.20 | 1.70 | NaN | NaN | NaN | NaN | NaN | 1.00 | 24.5 | 2.80 | 0.07 | 1.90 | 154.9 | 11.0 | 1.5 | 7.3 | 5.4 | 18.9 | 0.90 | 23.4 | 4.9 | 0.57 | 17.5 | 0.64 | 3.3 | 17.00 | NaN | 5.397605e-79 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 2.0 |
| 26 | 83836 | 0 | 5.83 | 6.27 | 2.0 | 2.0 | NaN | NaN | 2.0 | 18.0 | 1.0 | NaN | 14.0 | 1365.0 | 182.55 | 123.90 | 54.34 | 2.2 | 1.500000e+02 | 28.0 | 0.31 | 0.65 | 0.89 | 6.34 | 105.0 | 18.53 | NaN | NaN | NaN | NaN | NaN | NaN | 1180.0 | 13300.0 | 193.0 | 299.0 | 60.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | 3.000000e+00 | 2.0 | 1.000000e+00 | 2.0 | NaN | 3.000000e+00 | 1.0 | 1.0 |
| 27 | 83841 | 0 | 6.00 | 0.00 | NaN | NaN | NaN | NaN | 1.0 | 13.0 | 5.0 | NaN | 15.0 | 3612.0 | 501.32 | 91.85 | 113.38 | 34.0 | 9.750000e+02 | 20.6 | 0.12 | 0.52 | 2.41 | NaN | 108.0 | 14.37 | 0.07 | 0.07 | 0.6 | 1.30 | 0.20 | 0.20 | NaN | NaN | NaN | NaN | NaN | 5.40 | 1.2 | 15.60 | 0.20 | 0.71 | 6.7 | 1.1 | 4.7 | 17.4 | 15.6 | 19.7 | 1.50 | 58.1 | 6.8 | 1.10 | 14.3 | 0.64 | 4.5 | 5.50 | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 2.0 |
| 28 | 83844 | 0 | 5.77 | 6.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 27.0 | 1.0 | 2.0 | 7.0 | 1308.0 | 189.51 | 39.27 | 39.40 | 23.6 | 1.014000e+03 | 23.1 | 0.32 | 0.92 | 1.19 | 26.74 | 104.0 | 9.93 | NaN | NaN | NaN | NaN | NaN | NaN | 340.0 | 5360.0 | 77.0 | 152.0 | 67.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 2.000000e+00 | 2.0 | 1.000000e+00 | 1.0 | 2.0 | 4.000000e+00 | 2.0 | 2.0 |
| 29 | 83847 | 0 | 5.50 | 4.05 | 2.0 | 2.0 | NaN | NaN | 1.0 | 18.0 | 3.0 | NaN | 15.0 | 3479.0 | 410.94 | 176.61 | 146.89 | 18.6 | 3.750000e+02 | 20.8 | NaN | NaN | NaN | NaN | 99.0 | 7.15 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | NaN | 4.000000e+00 | 1.0 | 1.0 |
| 30 | 83851 | 0 | 4.39 | 5.22 | 2.0 | 2.0 | NaN | NaN | 2.0 | 37.0 | 3.0 | 3.0 | 8.0 | 2079.0 | 253.72 | 119.36 | 86.20 | 16.8 | 5.397605e-79 | 35.3 | 0.67 | 0.43 | 0.71 | 2.81 | 79.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9930.0 | 9770.0 | 890.0 | 1090.0 | 166.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 1.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 2.0 |
| 31 | 83854 | 0 | 5.59 | 6.61 | 2.0 | 2.0 | NaN | NaN | 2.0 | 46.0 | 1.0 | 5.0 | 9.0 | 3127.0 | 299.12 | 141.47 | 175.33 | 19.6 | 6.652500e+02 | 41.6 | 0.07 | 0.50 | 0.65 | NaN | 101.0 | 33.07 | 0.30 | 0.07 | 0.7 | 0.50 | 0.30 | 0.07 | NaN | NaN | NaN | NaN | NaN | 1.20 | 18.0 | 166.50 | 0.07 | 0.71 | 648.3 | 163.6 | 3.2 | 59.8 | 28.4 | 14.4 | 3.10 | 91.8 | 16.8 | 1.60 | 13.8 | 1.20 | 10.6 | 2.20 | 1.0 | 2.000000e+00 | 2.0 | 5.397605e-79 | 1.0 | 2.0 | 3.000000e+00 | 1.0 | 1.0 |
| 32 | 83855 | 0 | 5.33 | 4.94 | NaN | NaN | NaN | NaN | 2.0 | 12.0 | 3.0 | NaN | 7.0 | 1667.0 | 195.55 | 84.74 | 76.57 | 14.1 | 7.800000e+02 | 16.2 | 0.07 | 0.27 | 0.82 | NaN | 96.0 | 7.56 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 33 | 83860 | 0 | 6.11 | 6.94 | 2.0 | 2.0 | NaN | NaN | 1.0 | 41.0 | 4.0 | 4.0 | 15.0 | 1707.0 | 172.89 | 60.01 | 77.86 | 8.9 | 2.286000e+03 | 40.7 | 0.72 | 0.85 | 1.38 | NaN | 110.0 | 68.80 | 0.07 | 0.07 | 0.6 | 2.50 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.60 | 3.9 | 4.60 | 0.07 | 3.30 | 155.7 | 24.6 | 1.0 | 4.4 | 12.7 | 5.4 | 1.70 | 425.8 | 11.8 | 3.90 | 3.9 | 0.64 | 6.5 | 1.30 | 3.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 1.0 |
| 34 | 83862 | 0 | 5.33 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 19.0 | 2.0 | NaN | 99.0 | NaN | NaN | NaN | NaN | NaN | NaN | 24.4 | NaN | NaN | NaN | NaN | 96.0 | 14.47 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 2.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | NaN | 5.397605e-79 | 1.0 | 1.0 |
| 35 | 83863 | 0 | 5.38 | 3.61 | 2.0 | 2.0 | NaN | NaN | 1.0 | 35.0 | 1.0 | 3.0 | 14.0 | 2458.0 | 262.22 | 84.27 | 113.90 | 17.6 | 1.014000e+03 | 31.1 | 0.24 | 0.86 | 0.94 | NaN | 97.0 | 22.23 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 1.000000e+00 | 2.0 | 3.000000e+00 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | 1.0 |
| 36 | 83866 | 0 | 5.00 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 40.0 | 4.0 | 4.0 | 14.0 | 1977.0 | 162.04 | 47.63 | 86.01 | 8.7 | 1.005000e+03 | 30.7 | 0.50 | 1.61 | 2.03 | NaN | 90.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.40 | 1.2 | 47.10 | 0.07 | 0.71 | 13.8 | 5.0 | 10.6 | 13.4 | 46.0 | 18.3 | 2.60 | 303.1 | 45.4 | 5.60 | 26.4 | 0.64 | 29.2 | 8.00 | 2.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 1.000000e+00 | 2.0 | 1.0 |
| 37 | 83874 | 0 | 5.50 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 54.0 | 1.0 | 2.0 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 30.2 | NaN | NaN | NaN | NaN | 99.0 | 17.13 | 0.07 | 0.07 | 1.0 | 1.20 | 0.20 | 0.10 | NaN | NaN | NaN | NaN | NaN | 0.60 | 4.5 | 27.10 | 0.20 | 4.20 | 77.8 | 10.2 | 8.1 | 6.6 | 3.8 | 1.5 | 0.50 | 15.0 | 1.1 | 0.57 | 3.3 | 0.64 | 0.7 | 2.40 | NaN | 2.000000e+00 | 1.0 | NaN | 2.0 | 2.0 | 3.000000e+00 | 1.0 | 2.0 |
| 38 | 83887 | 0 | 5.27 | 6.77 | 2.0 | 2.0 | NaN | NaN | 2.0 | 51.0 | 4.0 | 2.0 | 99.0 | 1277.0 | 243.30 | 72.63 | 17.44 | 11.7 | 1.014000e+03 | 23.1 | NaN | NaN | NaN | NaN | 95.0 | 8.67 | 0.10 | 0.07 | 0.5 | 1.70 | 0.20 | 0.40 | NaN | NaN | NaN | NaN | NaN | 0.20 | 227.9 | 2.70 | 0.07 | 0.71 | 3.8 | 0.9 | 1.0 | 3.6 | 2.0 | 2.2 | 0.28 | 12.5 | 0.6 | 0.57 | 2.6 | 0.64 | 0.4 | 1.90 | NaN | 2.000000e+00 | 2.0 | 1.000000e+00 | 2.0 | 2.0 | 3.000000e+00 | 1.0 | 2.0 |
| 39 | 83894 | 0 | 5.50 | 8.99 | 2.0 | 2.0 | NaN | NaN | 1.0 | 60.0 | 4.0 | 3.0 | 8.0 | 1247.0 | 127.50 | 61.63 | 19.28 | 5.4 | 3.900000e+02 | 19.7 | 1.11 | 4.33 | 1.37 | NaN | 99.0 | 6.69 | 0.70 | 0.07 | 0.9 | 3.20 | 0.20 | 0.20 | NaN | NaN | NaN | NaN | NaN | 2.50 | 1.2 | 6.10 | 0.07 | 124.40 | 283.5 | 34.4 | 8.0 | 17.5 | 16.3 | 81.6 | 2.90 | 124.6 | 13.1 | 2.70 | 63.6 | 2.20 | 3.1 | 36.40 | 5.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 1.000000e+00 | 1.0 | 1.0 |
| 40 | 83897 | 0 | 5.11 | 5.22 | 2.0 | 2.0 | NaN | NaN | 2.0 | 29.0 | 3.0 | 5.0 | 7.0 | 2025.0 | 164.81 | 30.50 | 95.96 | 15.6 | 3.045000e+03 | 30.7 | 0.50 | 0.59 | 0.20 | 1.30 | 92.0 | 13.46 | NaN | NaN | NaN | NaN | NaN | NaN | 268.0 | 311.0 | 40.0 | 74.0 | 71.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 1.0 | 1.0 |
| 41 | 83908 | 0 | 5.66 | 5.88 | 2.0 | 2.0 | NaN | NaN | 1.0 | 51.0 | 4.0 | 3.0 | 6.0 | 1466.0 | 231.49 | 85.91 | 37.70 | 11.5 | 9.450000e+02 | 24.7 | 0.17 | 0.73 | 5.75 | NaN | 102.0 | 7.73 | 0.07 | 0.07 | 1.3 | 1.70 | 0.90 | 1.30 | NaN | NaN | NaN | NaN | NaN | 2.10 | 3.8 | 10.10 | 0.07 | 0.71 | 16.2 | 7.3 | 2.1 | 4.9 | 11.2 | 45.8 | 1.10 | 127.3 | 7.5 | 2.00 | 63.9 | 0.64 | 4.4 | 4.70 | 1.0 | 5.397605e-79 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 1.0 | 2.0 |
| 42 | 83909 | 0 | 5.77 | 5.55 | 2.0 | 2.0 | NaN | NaN | 2.0 | 49.0 | 3.0 | 4.0 | 77.0 | 3197.0 | 186.96 | 19.20 | 155.03 | 10.4 | 5.397605e-79 | 37.8 | 0.59 | 0.69 | 0.61 | NaN | 104.0 | 16.63 | 0.07 | 0.07 | 0.4 | 0.70 | 0.20 | 0.10 | NaN | NaN | NaN | NaN | NaN | 3.20 | 1.2 | 1051.40 | 1.30 | 25.70 | 310.4 | 84.2 | 0.9 | 2.4 | 3.7 | 12.3 | 0.28 | 20.0 | 2.5 | 0.57 | 3.3 | 0.64 | 1.9 | 5.60 | NaN | 5.397605e-79 | 1.0 | NaN | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 7.0 |
| 43 | 83911 | 1 | 18.40 | 0.00 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 43.0 | 4.0 | 4.0 | 14.0 | 1664.0 | 204.07 | 74.96 | 72.87 | 14.8 | 3.042000e+03 | 30.7 | NaN | NaN | NaN | NaN | 331.0 | 7.03 | 0.07 | 0.07 | 0.2 | 0.07 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 1.10 | 1.2 | 0.28 | 0.07 | 5.90 | 28.9 | 9.7 | 0.6 | 14.9 | 8.7 | 5.4 | 3.10 | 66.7 | 4.2 | 1.10 | 5.1 | 1.10 | 3.2 | 1.60 | 1.0 | 3.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | 2.0 |
| 44 | 83919 | 0 | 4.94 | 4.27 | 2.0 | 2.0 | NaN | NaN | 1.0 | 19.0 | 1.0 | NaN | 6.0 | 1053.0 | 98.48 | 0.67 | 59.09 | 8.2 | 7.605000e+02 | 21.6 | 0.18 | 0.41 | 0.20 | 1.31 | 89.0 | 2.47 | NaN | NaN | NaN | NaN | NaN | NaN | 1650.0 | 2190.0 | 110.0 | 270.0 | 56.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.0 | 5.397605e-79 | 1.0 | 2.000000e+00 | 1.0 | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 45 | 83931 | 0 | 5.44 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 37.0 | 2.0 | 2.0 | 7.0 | 1895.0 | 259.34 | 142.98 | 52.45 | 4.5 | 5.397605e-79 | 26.0 | 0.49 | 1.75 | 1.22 | NaN | 98.0 | 7.72 | 0.07 | 0.07 | 2.3 | 1.50 | 0.20 | 0.30 | NaN | NaN | NaN | NaN | NaN | 1.60 | 2.3 | 36.40 | 0.07 | 1.70 | 755.9 | 96.5 | 1.1 | 8.2 | 9.9 | 17.7 | 1.20 | 54.1 | 7.4 | 1.10 | 20.2 | 0.64 | 4.6 | 9.50 | 2.0 | 3.000000e+00 | 1.0 | 1.000000e+00 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | 1.0 |
| 46 | 83934 | 0 | 4.94 | 5.16 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 27.0 | 4.0 | 3.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | 17.2 | 0.14 | 0.22 | 0.52 | NaN | 89.0 | 5.83 | 0.07 | 0.07 | 1.5 | 1.10 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.30 | 1.2 | 4.00 | 0.07 | 0.71 | 33.0 | 7.6 | 0.5 | 1.8 | 2.8 | 44.1 | 0.40 | 17.2 | 2.0 | 0.57 | 13.9 | 0.64 | 1.5 | 36.10 | NaN | 1.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 2.0 | 2.0 |
| 47 | 83936 | 0 | 5.22 | 7.11 | NaN | NaN | NaN | NaN | 1.0 | 12.0 | 3.0 | NaN | 15.0 | 2407.0 | 392.35 | 150.44 | 73.82 | 36.0 | 2.022000e+03 | 23.5 | NaN | NaN | NaN | NaN | 94.0 | 8.35 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.000000e+00 | NaN | NaN | NaN | NaN | 3.000000e+00 | 1.0 | 1.0 |
| 48 | 83943 | 0 | 5.33 | 4.61 | 2.0 | 2.0 | NaN | NaN | 2.0 | 17.0 | 1.0 | NaN | 4.0 | 2114.0 | 212.53 | 46.03 | 102.80 | 26.0 | 9.750000e+02 | 26.8 | NaN | NaN | NaN | NaN | 96.0 | 12.85 | 0.07 | 0.07 | 0.3 | 0.40 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.40 | 80.4 | 50.70 | 0.07 | 0.71 | 4.5 | 1.5 | 0.5 | 7.8 | 1.2 | 1.8 | 0.28 | 6.3 | 0.6 | 0.57 | 2.9 | 0.64 | 0.4 | 0.21 | NaN | 5.397605e-79 | NaN | NaN | 2.0 | NaN | 1.000000e+00 | 1.0 | 1.0 |
| 49 | 83985 | 0 | 4.77 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 55.0 | 3.0 | 5.0 | 15.0 | NaN | NaN | NaN | NaN | NaN | NaN | 18.5 | NaN | NaN | NaN | NaN | 86.0 | 2.51 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 2.0 | NaN | 2.0 | 1.0 | 5.397605e-79 | 1.0 | 1.0 |
| 50 | 83988 | 0 | 6.11 | 5.22 | 2.0 | 2.0 | NaN | NaN | 1.0 | 21.0 | 1.0 | 3.0 | 8.0 | 3083.0 | 323.89 | 82.30 | 126.09 | 34.2 | 5.760000e+03 | 33.0 | NaN | NaN | NaN | NaN | 110.0 | 13.94 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.0 | 5.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 2.0 |
| 51 | 83991 | 0 | 5.44 | 7.16 | NaN | NaN | NaN | NaN | 2.0 | 14.0 | 5.0 | NaN | 6.0 | 1419.0 | 202.27 | 87.36 | 41.14 | 7.6 | 6.750000e+02 | 30.4 | 0.15 | 0.46 | 0.20 | 5.36 | 98.0 | 17.44 | NaN | NaN | NaN | NaN | NaN | NaN | 387.0 | 2400.0 | 33.0 | 69.0 | 52.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 1.0 |
| 52 | 83995 | 1 | 6.22 | 11.20 | 2.0 | 2.0 | NaN | NaN | 1.0 | 43.0 | 4.0 | 4.0 | 15.0 | 2362.0 | 103.39 | 30.58 | 103.12 | 3.2 | 5.397605e-79 | 43.2 | NaN | NaN | NaN | NaN | 112.0 | 41.03 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 3.000000e+00 | 2.0 | 5.397605e-79 | 1.0 | 1.0 | 2.000000e+00 | 1.0 | 1.0 |
| 53 | 84018 | 0 | 5.83 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 27.0 | 3.0 | 5.0 | 14.0 | 2806.0 | 338.34 | 115.88 | 114.59 | 14.3 | 1.800000e+03 | 29.6 | 0.51 | 0.51 | 2.02 | 1.97 | 105.0 | 11.55 | NaN | NaN | NaN | NaN | NaN | NaN | 2640.0 | 7270.0 | 231.0 | 479.0 | 104.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | 5.397605e-79 | 1.0 | 5.397605e-79 | 1.0 | 1.0 | 3.000000e+00 | 1.0 | 2.0 |
| 54 | 84029 | 0 | 5.16 | 5.77 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 28.0 | 1.0 | 3.0 | NaN | 1812.0 | 243.64 | 100.74 | 55.31 | 11.0 | 1.374000e+03 | 20.3 | 0.17 | 0.14 | 0.20 | 3.62 | 93.0 | 7.20 | NaN | NaN | NaN | NaN | NaN | NaN | 267.0 | 5260.0 | 22.0 | 116.0 | 91.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 5.397605e-79 | 2.0 | 5.397605e-79 | 2.0 | 1.0 | 5.397605e-79 | 2.0 | NaN |
| 55 | 84030 | 0 | 6.38 | 8.66 | 2.0 | 2.0 | NaN | NaN | 1.0 | 46.0 | 1.0 | 1.0 | 77.0 | 742.0 | 96.09 | 42.70 | 21.52 | 5.1 | 3.600000e+02 | 25.1 | 0.20 | 1.16 | 1.44 | NaN | 115.0 | 10.78 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 10.0 | 3.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 2.0 | 2.0 |
| 56 | 84032 | 0 | 5.03 | 4.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 51.0 | 1.0 | 4.0 | 77.0 | 1515.0 | 126.93 | 53.95 | 46.50 | 11.4 | 3.840000e+03 | 26.8 | NaN | NaN | NaN | NaN | 91.0 | 8.40 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 2.0 | 2.0 |
| 57 | 84033 | 0 | 5.22 | 5.16 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 41.0 | 5.0 | 4.0 | 9.0 | 1995.0 | 243.05 | 124.18 | 85.91 | 11.2 | 1.080000e+03 | 33.6 | 0.21 | 0.48 | 0.20 | 3.24 | 94.0 | 8.20 | NaN | NaN | NaN | NaN | NaN | NaN | 469.0 | 1390.0 | 42.0 | 103.0 | 69.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 5.397605e-79 | 1.0 | 5.397605e-79 | 1.0 | 2.0 | 2.000000e+00 | 2.0 | 1.0 |
| 58 | 84039 | 0 | 5.88 | 5.50 | 2.0 | 2.0 | NaN | NaN | 1.0 | 36.0 | 3.0 | 2.0 | 7.0 | 4184.0 | 470.50 | 215.12 | 201.79 | 32.1 | 5.397605e-79 | 28.2 | NaN | NaN | NaN | NaN | 106.0 | 11.95 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | 5.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 1.0 |
| 59 | 84042 | 0 | 5.61 | 7.27 | 2.0 | 2.0 | NaN | NaN | 1.0 | 75.0 | 3.0 | 2.0 | 6.0 | 2744.0 | 341.20 | 168.44 | 63.50 | 13.3 | 5.397605e-79 | 19.3 | 0.92 | 2.55 | 0.20 | 4.72 | 101.0 | 3.04 | NaN | NaN | NaN | NaN | NaN | NaN | 2330.0 | 3070.0 | 135.0 | 222.0 | 100.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6.0 | 5.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 5.397605e-79 | 1.0 | 1.0 |
| 60 | 84047 | 0 | 6.22 | 6.77 | 2.0 | 2.0 | NaN | NaN | 2.0 | 72.0 | 5.0 | 5.0 | 15.0 | 1655.0 | 251.02 | 120.96 | 45.54 | 16.6 | 5.850000e+02 | 22.1 | 0.47 | 0.92 | 2.74 | NaN | 112.0 | 17.55 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 1.0 |
| 61 | 84051 | 0 | 5.31 | 3.44 | 2.0 | 2.0 | NaN | NaN | 2.0 | 55.0 | 1.0 | 5.0 | 9.0 | 1779.0 | 232.80 | 79.64 | 65.42 | 16.5 | 9.600000e+02 | 32.5 | 2.08 | 1.88 | 0.38 | NaN | 96.0 | 6.23 | 0.20 | 0.07 | 1.0 | 1.20 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.14 | 4.8 | 88.40 | 0.40 | 0.71 | 24.7 | 1.2 | 0.7 | 3.4 | 3.3 | 3.0 | 0.28 | 39.7 | 1.5 | 0.57 | 11.0 | 0.64 | 1.0 | 2.90 | 1.0 | 4.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 9.0 | 5.397605e-79 | 1.0 | 1.0 |
| 62 | 84056 | 0 | 5.44 | 7.22 | NaN | NaN | NaN | NaN | 2.0 | 14.0 | 5.0 | NaN | 10.0 | 1399.0 | 201.74 | 21.73 | 51.50 | 6.6 | 1.521000e+03 | 28.6 | 0.63 | 0.62 | 0.96 | 15.25 | 98.0 | 16.53 | NaN | NaN | NaN | NaN | NaN | NaN | 193.0 | 1640.0 | 18.0 | 47.0 | 27.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.000000e+00 | NaN | NaN | NaN | NaN | 5.397605e-79 | 2.0 | 2.0 |
| 63 | 84058 | 0 | 4.94 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 21.0 | 3.0 | 4.0 | 9.0 | NaN | NaN | NaN | NaN | NaN | NaN | 23.1 | 0.18 | 0.40 | 0.20 | NaN | 89.0 | 3.56 | 0.07 | 0.07 | 0.4 | 0.60 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 4.80 | 9.1 | 64.20 | 18.50 | 17.50 | 499.3 | 623.9 | 35.2 | 36.4 | 14.5 | 13.1 | 7.20 | 74.2 | 11.0 | 1.00 | 18.0 | 1.90 | 7.4 | 5.20 | 3.0 | 2.000000e+00 | 2.0 | 5.397605e-79 | 1.0 | 2.0 | 6.000000e+00 | 1.0 | 2.0 |
| 64 | 84061 | 0 | 5.25 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 50.0 | 3.0 | 4.0 | 9.0 | 2282.0 | 239.24 | 117.41 | 107.61 | 20.1 | 5.397605e-79 | 27.2 | 0.28 | 0.85 | 1.95 | NaN | 95.0 | 7.74 | 0.07 | 0.07 | 0.6 | 0.60 | 0.20 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.80 | 33.8 | 4.00 | 0.07 | 0.71 | 11.3 | 1.8 | 0.3 | 1.8 | 2.2 | 1.9 | 0.28 | 7.4 | 1.5 | 0.90 | 1.4 | 0.64 | 0.9 | 4.20 | NaN | 5.000000e+00 | 1.0 | 5.397605e-79 | 1.0 | 2.0 | 6.000000e+00 | 1.0 | 1.0 |
| 65 | 84062 | 1 | 9.16 | 16.60 | 2.0 | 2.0 | NaN | NaN | 2.0 | 45.0 | 1.0 | 4.0 | 3.0 | 3449.0 | 386.92 | 121.48 | 177.37 | 45.7 | 6.300000e+02 | 33.9 | 0.51 | 0.41 | 0.72 | NaN | 165.0 | 12.36 | 0.07 | 0.07 | 0.4 | 0.40 | 0.20 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.50 | 5.2 | 30.80 | 0.20 | 0.71 | 83.7 | 13.2 | 0.9 | 3.9 | 10.6 | 6.2 | 0.28 | 6.1 | 6.0 | 1.20 | 6.0 | 0.64 | 3.7 | 2.00 | NaN | 1.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 1.0 | 2.0 |
| 66 | 84066 | 0 | 5.44 | 5.88 | NaN | NaN | NaN | NaN | 2.0 | 14.0 | 3.0 | NaN | 15.0 | 2806.0 | 349.75 | 92.54 | 101.37 | 16.8 | 1.365000e+03 | 18.7 | NaN | NaN | NaN | NaN | 98.0 | 8.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | 1.0 | 1.0 |
| 67 | 84070 | 0 | 4.77 | 6.83 | 2.0 | 2.0 | NaN | NaN | 1.0 | 40.0 | 3.0 | 4.0 | 12.0 | 2159.0 | 181.17 | 7.02 | 18.25 | 4.3 | 3.600000e+02 | 29.4 | 3.39 | 1.78 | 2.89 | NaN | 86.0 | 2.65 | 0.07 | 0.07 | 3.0 | 3.50 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.40 | 1.2 | 3.40 | 0.07 | 13.50 | 17.5 | 1.8 | 1.1 | 4.1 | 3.1 | 6.4 | 0.80 | 6.3 | 2.3 | 0.57 | 3.4 | 0.64 | 0.9 | 6.00 | 3.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 2.0 | 2.0 |
| 68 | 84073 | 0 | 5.71 | 7.11 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 39.0 | 2.0 | 3.0 | 10.0 | 2851.0 | 365.52 | 180.11 | 121.51 | 28.1 | 5.397605e-79 | 31.9 | 0.15 | 0.25 | 0.29 | 2.15 | 103.0 | 22.59 | NaN | NaN | NaN | NaN | NaN | NaN | 1040.0 | 16500.0 | 76.0 | 334.0 | 89.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 5.397605e-79 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 5.397605e-79 | 1.0 | 1.0 |
| 69 | 84087 | 0 | 5.33 | 5.55 | 2.0 | 2.0 | NaN | NaN | 1.0 | 36.0 | 5.0 | 5.0 | 15.0 | 2375.0 | 201.49 | 95.13 | 151.41 | 23.8 | 5.397605e-79 | 22.6 | 0.34 | 0.98 | 4.21 | 11.85 | 96.0 | 2.00 | NaN | NaN | NaN | NaN | NaN | NaN | 340.0 | 1140.0 | 87.0 | 134.0 | 94.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 1.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 2.0 |
| 70 | 84100 | 0 | 5.88 | 7.38 | NaN | NaN | NaN | NaN | 1.0 | 13.0 | 2.0 | NaN | 3.0 | 2304.0 | 259.13 | 89.52 | 102.93 | 19.4 | 8.700000e+02 | 28.8 | 0.07 | 0.48 | 2.03 | NaN | 106.0 | 55.43 | 0.07 | 0.07 | 0.4 | 0.70 | 0.10 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.70 | 1.2 | 67.20 | 0.07 | 0.71 | 17.1 | 1.0 | 2.1 | 6.7 | 14.3 | 11.5 | 0.70 | 296.0 | 5.4 | 1.10 | 15.9 | 0.64 | 4.8 | 9.90 | NaN | 2.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 2.0 | 1.0 |
| 71 | 84102 | 0 | 5.05 | 6.38 | NaN | NaN | NaN | NaN | 2.0 | 13.0 | 4.0 | NaN | 6.0 | 466.0 | 58.33 | 6.27 | 16.96 | 4.0 | 4.350000e+02 | 33.6 | NaN | NaN | NaN | NaN | 91.0 | 29.07 | 0.07 | 0.07 | 0.2 | 1.50 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.90 | 1.2 | 5.40 | 0.07 | 0.71 | 2.8 | 0.5 | 1.5 | 1.8 | 2.3 | 10.0 | 0.60 | 5.5 | 2.4 | 0.57 | 13.2 | 0.64 | 1.6 | 2.40 | NaN | 2.000000e+00 | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 2.0 |
| 72 | 84106 | 0 | 5.50 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 57.0 | 5.0 | 5.0 | 15.0 | NaN | NaN | NaN | NaN | NaN | NaN | 24.6 | 0.66 | 0.90 | 8.13 | 58.99 | 99.0 | 3.55 | NaN | NaN | NaN | NaN | NaN | NaN | 1400.0 | 4280.0 | 96.0 | 157.0 | 70.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | NaN | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 1.0 |
| 73 | 84121 | 0 | 4.74 | 5.77 | 2.0 | 2.0 | NaN | NaN | 1.0 | 18.0 | 1.0 | NaN | 10.0 | 1078.0 | 154.32 | 40.60 | 35.62 | 13.5 | 7.200000e+02 | 26.6 | 0.07 | 0.24 | 0.20 | 3.72 | 85.0 | 19.19 | NaN | NaN | NaN | NaN | NaN | NaN | 471.0 | 13000.0 | 154.0 | 349.0 | 146.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | NaN | 3.000000e+00 | 1.0 | 1.0 |
| 74 | 84127 | 0 | 4.91 | 5.44 | NaN | NaN | NaN | NaN | 2.0 | 12.0 | 1.0 | NaN | 10.0 | 1148.0 | 183.80 | 84.24 | 36.92 | 9.2 | 5.397605e-79 | 19.9 | 0.07 | 0.27 | 0.20 | 4.83 | 88.0 | 16.26 | NaN | NaN | NaN | NaN | NaN | NaN | 464.0 | 14000.0 | 120.0 | 639.0 | 113.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.000000e+00 | NaN | NaN | NaN | NaN | 3.000000e+00 | 1.0 | 1.0 |
| 75 | 84130 | 0 | 5.00 | 2.00 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 26.0 | 3.0 | 3.0 | 10.0 | 1470.0 | 140.87 | 62.64 | 69.19 | 10.5 | 3.885000e+03 | 20.5 | NaN | NaN | NaN | NaN | 90.0 | 8.18 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 5.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 1.000000e+00 | 1.0 | 1.0 |
| 76 | 84134 | 0 | 5.33 | 4.39 | NaN | NaN | NaN | NaN | 1.0 | 13.0 | 3.0 | NaN | 15.0 | 1395.0 | 192.49 | 41.78 | 48.42 | 11.6 | 1.455000e+03 | 18.3 | NaN | NaN | NaN | NaN | 96.0 | 10.63 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 1.0 |
| 77 | 84142 | 0 | 4.50 | 6.94 | 2.0 | 2.0 | NaN | NaN | 1.0 | 24.0 | 3.0 | 3.0 | 15.0 | 6223.0 | 706.87 | 430.14 | 279.25 | 39.0 | 1.545000e+03 | 32.8 | 0.18 | 0.58 | 0.37 | 5.01 | 81.0 | 38.27 | NaN | NaN | NaN | NaN | NaN | NaN | 662.0 | 17100.0 | 61.0 | 148.0 | 134.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 5.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 1.000000e+00 | 1.0 | 2.0 |
| 78 | 84143 | 0 | 6.11 | 6.88 | 2.0 | 2.0 | NaN | 2.0 | 2.0 | 35.0 | 5.0 | 1.0 | 9.0 | 1786.0 | 166.78 | 45.53 | 81.53 | 11.7 | 8.400000e+02 | 28.9 | 2.37 | 0.71 | 0.33 | 0.64 | 110.0 | 11.50 | NaN | NaN | NaN | NaN | NaN | NaN | 4830.0 | 4600.0 | 163.0 | 305.0 | 39.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 2.000000e+00 | 1.0 | 2.0 |
| 79 | 84151 | 0 | 4.77 | 0.00 | 2.0 | 2.0 | NaN | NaN | 2.0 | 53.0 | 3.0 | 5.0 | 15.0 | 1721.0 | 75.88 | 18.45 | 106.42 | 16.2 | 1.521000e+03 | 25.3 | NaN | NaN | NaN | NaN | 86.0 | 2.95 | 0.07 | 0.07 | 2.7 | 1.30 | 0.30 | 0.20 | NaN | NaN | NaN | NaN | NaN | 0.60 | 1.2 | 1430.80 | 0.07 | 11.10 | 31.7 | 8.7 | 0.2 | 0.9 | 3.7 | 4.2 | 0.28 | 12.5 | 2.0 | 1.30 | 2.7 | 0.64 | 1.3 | 0.60 | 2.0 | 5.397605e-79 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 3.000000e+00 | 1.0 | 1.0 |
| 80 | 84168 | 1 | 8.10 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 61.0 | 5.0 | 1.0 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 16.4 | NaN | NaN | NaN | NaN | 146.0 | 6.12 | 0.07 | 0.07 | 1.2 | 3.00 | 0.80 | 0.40 | NaN | NaN | NaN | NaN | NaN | 3.10 | 1.2 | 1.70 | 0.07 | 0.71 | 132.8 | 2.8 | 0.6 | 3.2 | 6.3 | 13.9 | 0.40 | 17.8 | 3.6 | 0.57 | 12.1 | 0.64 | 2.0 | 6.80 | NaN | 4.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 2.0 | 2.0 |
| 81 | 84181 | 0 | 5.77 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 35.0 | 4.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 30.4 | NaN | NaN | NaN | NaN | 104.0 | 8.69 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 8.000000e+00 | 1.0 | 3.000000e+00 | 2.0 | 1.0 | 2.000000e+00 | 2.0 | 2.0 |
| 82 | 84191 | 0 | 5.27 | 5.61 | NaN | NaN | NaN | NaN | 2.0 | 12.0 | 4.0 | NaN | 7.0 | 425.0 | 51.16 | 4.28 | 16.57 | 2.7 | 6.150000e+02 | 26.5 | 0.07 | 0.40 | 0.20 | 1.65 | 95.0 | 13.09 | NaN | NaN | NaN | NaN | NaN | NaN | 418.0 | 9440.0 | 77.0 | 186.0 | 81.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 1.0 | 2.0 |
| 83 | 84195 | 0 | 6.22 | 9.44 | 2.0 | 2.0 | NaN | NaN | 1.0 | 45.0 | 4.0 | 4.0 | 5.0 | 1560.0 | 208.14 | 95.93 | 68.05 | 9.3 | 1.051990e+03 | 31.6 | NaN | NaN | NaN | NaN | 112.0 | 10.46 | 0.20 | 0.07 | 0.3 | 2.20 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 1.60 | 4.2 | 9.40 | 0.07 | 0.71 | 6.9 | 2.5 | 2.2 | 64.2 | 17.2 | 13.7 | 5.60 | 49.9 | 8.6 | 2.60 | 27.8 | 1.60 | 5.8 | 13.90 | NaN | 1.000000e+00 | 2.0 | 1.000000e+00 | 2.0 | 1.0 | 2.000000e+00 | 1.0 | 2.0 |
| 84 | 84196 | 0 | 5.72 | 3.39 | 2.0 | 2.0 | NaN | NaN | 1.0 | 32.0 | 5.0 | 2.0 | 14.0 | 2111.0 | 357.14 | 95.73 | 39.77 | 16.5 | 7.800000e+02 | 23.5 | NaN | NaN | NaN | NaN | 103.0 | 6.07 | 0.07 | 0.07 | 0.7 | 0.80 | 0.20 | 0.20 | NaN | NaN | NaN | NaN | NaN | 1.20 | 2.0 | 5.30 | 0.07 | 0.71 | 5.5 | 0.3 | 0.7 | 13.7 | 24.6 | 18.0 | 0.70 | 24.0 | 16.9 | 7.00 | 5.1 | 2.20 | 9.8 | 5.20 | 3.0 | 3.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 5.397605e-79 | 1.0 | 1.0 |
| 85 | 84197 | 0 | 6.83 | 8.27 | 2.0 | 2.0 | NaN | NaN | 1.0 | 59.0 | 3.0 | 5.0 | 15.0 | 1485.0 | 131.36 | 68.70 | 51.28 | 7.6 | 5.397605e-79 | 35.4 | 0.17 | 0.93 | 1.22 | 239.58 | 123.0 | 16.82 | NaN | NaN | NaN | NaN | NaN | NaN | 15000.0 | 16600.0 | 1230.0 | 1040.0 | 1310.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 4.000000e+00 | 2.0 | 5.397605e-79 | 2.0 | 2.0 | 3.000000e+00 | 1.0 | 1.0 |
| 86 | 84221 | 0 | 5.11 | 5.16 | 2.0 | 2.0 | NaN | NaN | 2.0 | 16.0 | 3.0 | NaN | 8.0 | 1521.0 | 256.45 | 148.59 | 45.18 | 11.1 | 1.620000e+03 | 31.6 | NaN | NaN | NaN | NaN | 92.0 | 5.82 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | NaN | NaN | 2.0 | NaN | 5.397605e-79 | 2.0 | 1.0 |
| 87 | 84243 | 0 | 5.27 | 5.88 | 2.0 | 2.0 | NaN | NaN | 2.0 | 16.0 | 5.0 | NaN | 7.0 | 656.0 | 71.78 | 23.40 | 20.67 | 2.6 | 2.535000e+02 | 26.5 | 0.27 | 0.58 | 4.50 | NaN | 95.0 | 14.36 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5.397605e-79 | NaN | NaN | 2.0 | NaN | 5.397605e-79 | 1.0 | 1.0 |
| 88 | 84245 | 0 | 5.72 | 6.72 | 2.0 | 2.0 | NaN | NaN | 2.0 | 43.0 | 3.0 | 3.0 | 15.0 | 1794.0 | 134.75 | 72.21 | 105.83 | 12.7 | 2.010000e+03 | 28.9 | 0.19 | 0.75 | 0.45 | NaN | 103.0 | 7.07 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.000000e+00 | 2.0 | NaN | 2.0 | 2.0 | 5.397605e-79 | 1.0 | 1.0 |
| 89 | 84251 | 0 | 4.94 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 20.0 | 4.0 | 2.0 | 6.0 | NaN | NaN | NaN | NaN | NaN | NaN | 22.2 | NaN | NaN | NaN | NaN | 89.0 | 1.71 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | 1.000000e+00 | 2.0 | 2.000000e+00 | 1.0 | 2.0 | 1.000000e+00 | 2.0 | 2.0 |
| 90 | 84269 | 0 | 5.72 | 4.61 | 2.0 | 2.0 | NaN | NaN | 1.0 | 26.0 | 5.0 | 4.0 | 4.0 | 1946.0 | 277.89 | 217.33 | 65.57 | 7.4 | 8.700000e+02 | 22.1 | NaN | NaN | NaN | NaN | 103.0 | 3.71 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 2.000000e+00 | 1.0 | 1.000000e+00 | 1.0 | 2.0 | 5.397605e-79 | 1.0 | 2.0 |
| 91 | 84270 | 0 | 5.83 | 7.55 | NaN | NaN | NaN | NaN | 1.0 | 12.0 | 3.0 | NaN | 2.0 | 2101.0 | 278.46 | 160.27 | 73.33 | 8.1 | 4.800000e+02 | 19.8 | 0.13 | 0.68 | 0.20 | 1.76 | 105.0 | 9.09 | NaN | NaN | NaN | NaN | NaN | NaN | 495.0 | 4300.0 | 38.0 | 91.0 | 41.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 2.0 |
| 92 | 84273 | 0 | 5.94 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 63.0 | 5.0 | 2.0 | 6.0 | 1493.0 | 128.16 | 42.25 | 64.47 | 11.7 | 4.950000e+02 | 22.1 | NaN | NaN | NaN | NaN | 107.0 | 5.39 | 0.07 | 0.07 | 2.6 | 2.10 | 1.70 | 1.30 | NaN | NaN | NaN | NaN | NaN | 0.70 | 2.0 | 5136.60 | 0.90 | 482.80 | 916.9 | 31.7 | 3.3 | 5.6 | 24.8 | 192.5 | 2.40 | 806.7 | 15.4 | 11.50 | 36.2 | 1.90 | 10.1 | 11.80 | NaN | 2.000000e+00 | 1.0 | NaN | 2.0 | 2.0 | 4.000000e+00 | 1.0 | 2.0 |
| 93 | 84278 | 0 | 5.66 | 7.22 | NaN | NaN | NaN | NaN | 2.0 | 14.0 | 2.0 | NaN | 6.0 | 1857.0 | 195.63 | 70.50 | 96.79 | 13.4 | 5.397605e-79 | 29.6 | 0.20 | 0.39 | 0.20 | NaN | 102.0 | 15.54 | 0.07 | 0.07 | 0.2 | 0.50 | 0.07 | 0.07 | NaN | NaN | NaN | NaN | NaN | 0.40 | 22.0 | 24.70 | 0.10 | 1.20 | 61.5 | 13.8 | 3.2 | 12.0 | 3.7 | 7.3 | 1.30 | 15.4 | 2.8 | 0.57 | 4.5 | 1.20 | 1.7 | 5.80 | NaN | 2.000000e+00 | NaN | NaN | NaN | NaN | 2.000000e+00 | 1.0 | 2.0 |
| 94 | 84287 | 0 | 5.44 | 3.11 | 2.0 | 2.0 | NaN | NaN | 1.0 | 34.0 | 3.0 | 3.0 | 8.0 | 4497.0 | 436.58 | 244.52 | 193.05 | 21.5 | 5.397605e-79 | 25.6 | 0.56 | 3.48 | 0.70 | 2.36 | 98.0 | 5.02 | NaN | NaN | NaN | NaN | NaN | NaN | 4750.0 | 6890.0 | 367.0 | 512.0 | 104.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6.0 | 3.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 1.0 | 1.000000e+00 | 1.0 | 3.0 |
| 95 | 84299 | 0 | 5.88 | 5.66 | 2.0 | 2.0 | NaN | NaN | 1.0 | 51.0 | 3.0 | 4.0 | 14.0 | 4911.0 | 617.44 | 347.51 | 210.35 | 25.5 | 5.070000e+02 | 35.0 | 0.47 | 0.78 | 0.33 | 3.68 | 106.0 | 30.17 | NaN | NaN | NaN | NaN | NaN | NaN | 8520.0 | 12900.0 | 800.0 | 1220.0 | 594.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | 2.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 5.397605e-79 | 1.0 | 1.0 |
| 96 | 84302 | 0 | 5.83 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 74.0 | 3.0 | 3.0 | 9.0 | 1761.0 | 210.00 | 146.14 | 79.66 | 4.5 | 5.397605e-79 | 25.5 | NaN | NaN | NaN | NaN | 105.0 | 10.46 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.000000e+00 | 1.0 | 5.397605e-79 | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 1.0 |
| 97 | 84321 | 0 | 6.05 | 5.16 | 2.0 | 2.0 | NaN | NaN | 2.0 | 62.0 | 1.0 | 4.0 | 8.0 | 2555.0 | 290.30 | 117.35 | 112.77 | 18.7 | 5.397605e-79 | 28.7 | 0.26 | 0.58 | 0.34 | 4.25 | 109.0 | 24.62 | NaN | NaN | NaN | NaN | NaN | NaN | 4350.0 | 5830.0 | 53.0 | 94.0 | 87.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000e+00 | 2.0 | 2.000000e+00 | 2.0 | 1.0 | 1.000000e+00 | 1.0 | 1.0 |
| 98 | 84324 | 0 | 6.77 | 0.00 | 2.0 | 2.0 | NaN | NaN | 1.0 | 59.0 | 3.0 | 2.0 | 4.0 | 5980.0 | 1222.34 | 980.92 | 85.19 | 10.1 | 5.397605e-79 | 27.3 | 1.53 | 1.59 | 0.28 | 8.15 | 122.0 | 11.78 | NaN | NaN | NaN | NaN | NaN | NaN | 16800.0 | 27600.0 | 1140.0 | 2080.0 | 335.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.000000e+00 | 1.0 | NaN | 2.0 | 2.0 | 2.000000e+00 | 1.0 | 2.0 |
| 99 | 84326 | 0 | 4.77 | 6.16 | NaN | NaN | NaN | NaN | 1.0 | 13.0 | 3.0 | NaN | 15.0 | 1221.0 | 157.54 | 86.09 | 52.04 | 6.1 | 4.200000e+02 | 16.8 | NaN | NaN | NaN | NaN | 86.0 | 4.17 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.000000e+00 | NaN | NaN | NaN | NaN | 5.397605e-79 | 2.0 | 1.0 |
# removing DID060 because it is all NaN values
print(nhanes_merged["DID060"].isnull().all())
nhanes_merged = nhanes_merged.drop('DID060', axis='columns')
# removing BPQ020, BPQ080, & RHD143 because it is all no values
nhanes_merged = nhanes_merged.drop('BPQ020', axis='columns')
nhanes_merged = nhanes_merged.drop('BPQ080', axis='columns')
nhanes_merged = nhanes_merged.drop('RHD143', axis='columns')
True
nhanes_merged.dtypes
SEQN int64 diabetes int64 LBDGLUSI float64 LBDGLTSI float64 RIAGENDR float64 RIDAGEYR float64 RIDRETH1 float64 DMDEDUC2 float64 INDHHIN2 float64 DR1TKCAL float64 DR1TCARB float64 DR1TSUGR float64 DR1TTFAT float64 DR1TFIBE float64 DR1_320Z float64 BMXBMI float64 LBXBCD float64 LBXBPB float64 LBXTHG float64 URXUAS float64 LBXGLU float64 LBXIN float64 LBXMPAH float64 LBXPFDO float64 LBXPFNA float64 LBXPFHS float64 LBXPFDE float64 LBXPFUA float64 URXP01 float64 URXP02 float64 URXP03 float64 URXP04 float64 URXP06 float64 URXBPH float64 URXTRS float64 URXBP3 float64 URXBUP float64 URXEPB float64 URXMPB float64 URXPPB float64 URXCNP float64 URXCOP float64 URXECP float64 URXMBP float64 URXMC1 float64 URXMEP float64 URXMHH float64 URXMHP float64 URXMIB float64 URXMNP float64 URXMOH float64 URXMZP float64 ALQ130 float64 PAQ710 float64 SMQ020 float64 DPQ010 float64 SLQ050 float64 MCQ300C float64 HUQ051 float64 HIQ011 float64 HOQ065 float64 dtype: object
nhanes_merged.shape
(10835, 61)
10835 rows or entries by 86 different columns or variables
nhanes_merged['SEQN'].duplicated().any()
False
No duplicate patients
Data Quality Discussion¶
I would say that overall, creating the dataset was very simple and straightforward. The only tedious task was having to download all of the datasets and importing them. Of course, there were some instances where the names of the variables changed from year to year like for income and # hours watch TV or videos past 30 days but they were easy to look up in the respective nhanes data dictionary.
# converting categorical variables into binary & converting Refused and Don't Know entries into NaN
## gender RIAGENDR 0 = Male, 1 = Female
nhanes_merged["RIAGENDR"] = nhanes_merged["RIAGENDR"].replace(7, np.nan) ### check demographics
nhanes_merged["RIAGENDR"] = nhanes_merged["RIAGENDR"].replace(9, np.nan)
nhanes_merged["RIAGENDR"] = nhanes_merged["RIAGENDR"] - 1
## race/ethnicity RIDRETH1 0 = Non-Hispanic White, 1 = Other
nhanes_merged["RIAGENDR"] = nhanes_merged["RIAGENDR"].replace(7, np.nan) ### check demographics
nhanes_merged["RIAGENDR"] = nhanes_merged["RIAGENDR"].replace(9, np.nan)
nan_RIDRETH1 = nhanes_merged["RIDRETH1"].isna()
nhanes_merged["RIDRETH1"] = pd.get_dummies(nhanes_merged["RIDRETH1"], dtype=int)[3]
nhanes_merged["RIDRETH1"][nan_RIDRETH1] = np.nan
## education DMDEDUC2 0 = Up to GED/High School Diploma, 1 = Higher Education
nhanes_merged["DMDEDUC2"] = nhanes_merged["DMDEDUC2"].replace(7, np.nan)
nhanes_merged["DMDEDUC2"] = nhanes_merged["DMDEDUC2"].replace(9, np.nan)
nhanes_merged["DMDEDUC2"] = nhanes_merged["DMDEDUC2"].replace(4, 5)
nan_DMDEDUC2 = nhanes_merged["DMDEDUC2"].isna()
nhanes_merged["DMDEDUC2"] = pd.get_dummies(nhanes_merged["DMDEDUC2"], dtype=int)[5]
nhanes_merged["DMDEDUC2"][nan_DMDEDUC2] = np.nan
## annual household income INDHHIN2 0 = Under $75,000, 1 = $75,000 or higher
nhanes_merged["INDHHIN2"] = nhanes_merged["INDHHIN2"].replace(77, np.nan)
nhanes_merged["INDHHIN2"] = nhanes_merged["INDHHIN2"].replace(99, np.nan)
nan_INDHHIN2 = nhanes_merged["INDHHIN2"].isna()
nhanes_merged["INDHHIN2"] = pd.get_dummies(nhanes_merged["INDHHIN2"], dtype=int)[11]
nhanes_merged["INDHHIN2"][nan_INDHHIN2] = np.nan
## interest/motivation DPQ010 0 = Less then half the days, 1 = More than half the days
nhanes_merged["DPQ010"] = nhanes_merged["DPQ010"].replace(7, np.nan)
nhanes_merged["DPQ010"] = nhanes_merged["DPQ010"].replace(9, np.nan)
nhanes_merged["DPQ010"] = nhanes_merged["DPQ010"].replace(3, 2)
nan_DPQ010 = nhanes_merged["DPQ010"].isna()
nhanes_merged["DPQ010"] = pd.get_dummies(nhanes_merged["DPQ010"], dtype=int)[2]
nhanes_merged["DPQ010"][nan_DPQ010] = np.nan
## number of hours watched TV or videos a day PAQ710 0 = 2 hours or less, 1 = 3 hours or more
nhanes_merged["PAQ710"] = nhanes_merged["PAQ710"].replace(77, np.nan)
nhanes_merged["PAQ710"] = nhanes_merged["PAQ710"].replace(99, np.nan)
nhanes_merged["PAQ710"] = nhanes_merged["PAQ710"].replace(4, 3)
nhanes_merged["PAQ710"] = nhanes_merged["PAQ710"].replace(5, 3)
nan_PAQ710 = nhanes_merged["PAQ710"].isna()
nhanes_merged["PAQ710"] = pd.get_dummies(nhanes_merged["PAQ710"], dtype=int)[3]
nhanes_merged["PAQ710"][nan_PAQ710] = np.nan
## number of healthcare visits in past year HUQ051 0 = At Least Once, 1 = None
nhanes_merged["HUQ051"] = nhanes_merged["HUQ051"].replace(77, np.nan)
nhanes_merged["HUQ051"] = nhanes_merged["HUQ051"].replace(99, np.nan)
nan_HUQ051 = nhanes_merged["HUQ051"].isna()
nhanes_merged["HUQ051"] = pd.get_dummies(nhanes_merged["HUQ051"], dtype=int).iloc[:,0]
nhanes_merged["HUQ051"][nan_HUQ051] = np.nan
## home ownership HOQ065 0 = Not Own, 1 = Owned
nhanes_merged["HOQ065"] = nhanes_merged["HOQ065"].replace(7, np.nan)
nhanes_merged["HOQ065"] = nhanes_merged["HOQ065"].replace(9, np.nan)
nan_HOQ065 = nhanes_merged["HOQ065"].isna()
nhanes_merged["HOQ065"] = pd.get_dummies(nhanes_merged["HOQ065"], dtype=int)[1]
nhanes_merged["HOQ065"][nan_HOQ065] = np.nan
## smoked at least 100 cigarettes in life SMQ020 0 = Yes, 1 = No
nhanes_merged["SMQ020"] = nhanes_merged["SMQ020"].replace(7, np.nan)
nhanes_merged["SMQ020"] = nhanes_merged["SMQ020"].replace(9, np.nan)
nhanes_merged["SMQ020"] = nhanes_merged["SMQ020"] - 1
## family history of diabetes MCQ300C 0 = Yes, 1 = No
nhanes_merged["MCQ300C"] = nhanes_merged["MCQ300C"].replace(7, np.nan)
nhanes_merged["MCQ300C"] = nhanes_merged["MCQ300C"].replace(9, np.nan)
nhanes_merged["MCQ300C"] = nhanes_merged["MCQ300C"] - 1
## health insurance status HIQ011 0 = Yes, 1 = No
nhanes_merged["HIQ011"] = nhanes_merged["HIQ011"].replace(7, np.nan)
nhanes_merged["HIQ011"] = nhanes_merged["HIQ011"].replace(9, np.nan)
nhanes_merged["HIQ011"] = nhanes_merged["HIQ011"] - 1
## ever told doctor had trouble sleeping? SLQ050 0 = Yes, 1 = No
nhanes_merged["SLQ050"] = nhanes_merged["SLQ050"].replace(7, np.nan)
nhanes_merged["SLQ050"] = nhanes_merged["SLQ050"].replace(9, np.nan)
nhanes_merged["SLQ050"] = nhanes_merged["SLQ050"] - 1
# continuous converting Refused and Don't Know entries into NaN
## ALQ130
nhanes_merged["ALQ130"] = nhanes_merged["ALQ130"].replace(777, np.nan)
nhanes_merged["ALQ130"] = nhanes_merged["ALQ130"].replace(999, np.nan)
80/20 Split¶
train_data, test_data = train_test_split(nhanes_merged, test_size = 0.2, random_state = 78)
train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)
Univariate Statistics¶
categorical_nhanes = ["RIAGENDR", "RIDRETH1", "DMDEDUC2", "INDHHIN2", "DPQ010", "PAQ710", "HUQ051", "HOQ065", "SMQ020",
"diabetes", "MCQ300C", "HIQ011", "SLQ050"]
continuous_nhanes = ["RIDAGEYR", "BMXBMI", "ALQ130", "DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "LBXIN", "LBXGLU",
"LBDGLUSI", "LBXBCD", "LBXBPB", "LBXTHG", "URXUAS", "LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA", "URXBPH", "URXTRS",
"URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB", "URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1",
"URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP", "URXP01", "URXP02", "URXP03",
"URXP04", "URXP06", "DR1TFIBE", "DR1_320Z"]
# value counts for categorical/binary variables
for col in categorical_nhanes:
print(train_data[col].value_counts())
RIAGENDR 0.0 4360 1.0 4308 Name: count, dtype: int64 RIDRETH1 0 5514 1 3154 Name: count, dtype: int64 DMDEDUC2 1.0 3108 0.0 2707 Name: count, dtype: int64 INDHHIN2 0.0 7933 1.0 330 Name: count, dtype: int64 DPQ010 0.0 5452 1.0 361 Name: count, dtype: int64 PAQ710 0.0 3534 1.0 2030 Name: count, dtype: int64 HUQ051 0.0 6688 1.0 1973 Name: count, dtype: int64 HOQ065 1.0 4720 0.0 3837 Name: count, dtype: int64 SMQ020 1.0 3584 0.0 2432 Name: count, dtype: int64 diabetes 0 8220 1 448 Name: count, dtype: int64 MCQ300C 1.0 3667 0.0 2034 Name: count, dtype: int64 HIQ011 0.0 6341 1.0 2304 Name: count, dtype: int64 SLQ050 1.0 6029 0.0 1156 Name: count, dtype: int64
# summary statistics for continuous variables
for col in continuous_nhanes:
print(train_data[col].describe())
count 8668.000000 mean 32.762344 std 17.916986 min 12.000000 25% 17.000000 50% 28.000000 75% 44.000000 max 85.000000 Name: RIDAGEYR, dtype: float64 count 8593.000000 mean 26.435478 std 6.515241 min 13.400000 25% 21.700000 50% 25.390000 75% 29.800000 max 68.600000 Name: BMXBMI, dtype: float64 count 3946.000000 mean 3.149265 std 3.096631 min 1.000000 25% 1.000000 50% 2.000000 75% 4.000000 max 82.000000 Name: ALQ130, dtype: float64 count 8212.000000 mean 2165.899903 std 1014.797245 min 93.000000 25% 1472.000000 50% 1994.000000 75% 2666.000000 max 12823.000000 Name: DR1TKCAL, dtype: float64 count 8212.000000 mean 268.898048 std 132.693628 min 0.030000 25% 178.500000 50% 248.210000 75% 332.420000 max 1670.260000 Name: DR1TCARB, dtype: float64 count 8.212000e+03 mean 1.217162e+02 std 8.204480e+01 min 5.397605e-79 25% 6.692000e+01 50% 1.055800e+02 75% 1.569600e+02 max 1.022980e+03 Name: DR1TSUGR, dtype: float64 count 8212.000000 mean 81.405588 std 46.647162 min 0.070000 25% 49.157500 50% 72.990000 75% 103.685000 max 601.330000 Name: DR1TTFAT, dtype: float64 count 8463.000000 mean 12.664783 std 11.481507 min 0.140000 25% 6.090000 50% 9.410000 75% 15.085000 max 136.790000 Name: LBXIN, dtype: float64 count 8668.000000 mean 99.255768 std 23.215104 min 55.000000 25% 90.000000 50% 96.000000 75% 102.000000 max 421.000000 Name: LBXGLU, dtype: float64 count 8668.000000 mean 5.509531 std 1.288871 min 3.053000 25% 4.996000 50% 5.329000 75% 5.662000 max 23.370000 Name: LBDGLUSI, dtype: float64 count 7333.000000 mean 0.430792 std 0.545120 min 0.070000 25% 0.140000 50% 0.250000 75% 0.460000 max 8.800000 Name: LBXBCD, dtype: float64 count 7333.000000 mean 1.325093 std 1.532151 min 0.050000 25% 0.610000 50% 0.950000 75% 1.540000 max 55.200000 Name: LBXBPB, dtype: float64 count 7333.000000 mean 1.277849 std 1.924049 min 0.110000 25% 0.360000 50% 0.680000 75% 1.380000 max 38.500000 Name: LBXTHG, dtype: float64 count 2856.000000 mean 18.592031 std 48.016661 min 0.260000 25% 3.850000 50% 7.100000 75% 15.250000 max 1195.000000 Name: URXUAS, dtype: float64 count 2834.000000 mean 0.289287 std 0.458981 min 0.060000 25% 0.070000 50% 0.160000 75% 0.300000 max 12.200000 Name: LBXMPAH, dtype: float64 count 2834.000000 mean 0.103617 std 0.074885 min 0.070000 25% 0.070000 50% 0.070000 75% 0.140000 max 2.500000 Name: LBXPFDO, dtype: float64 count 2834.000000 mean 1.079670 std 1.036864 min 0.058000 25% 0.550000 50% 0.820000 75% 1.307500 max 25.748000 Name: LBXPFNA, dtype: float64 count 2834.000000 mean 2.072068 std 2.564160 min 0.070000 25% 0.700000 50% 1.300000 75% 2.400000 max 25.600000 Name: LBXPFHS, dtype: float64 count 2834.000000 mean 0.321161 std 0.562450 min 0.070000 25% 0.140000 50% 0.200000 75% 0.380000 max 17.800000 Name: LBXPFDE, dtype: float64 count 2834.000000 mean 0.205953 std 0.309372 min 0.070000 25% 0.070000 50% 0.140000 75% 0.200000 max 6.300000 Name: LBXPFUA, dtype: float64 count 2805.000000 mean 3.289783 std 19.163532 min 0.140000 25% 0.800000 50% 1.700000 75% 3.100000 max 965.000000 Name: URXBPH, dtype: float64 count 2805.000000 mean 98.679540 std 280.416057 min 1.200000 25% 1.900000 50% 8.000000 75% 40.000000 max 2779.700000 Name: URXTRS, dtype: float64 count 2805.000000 mean 210.584877 std 1135.339327 min 0.280000 25% 4.900000 50% 14.100000 75% 55.200000 max 27200.000000 Name: URXBP3, dtype: float64 count 2805.000000 mean 3.067565 std 16.816298 min 0.070000 25% 0.100000 50% 0.140000 75% 0.400000 max 493.000000 Name: URXBUP, dtype: float64 count 2805.000000 mean 20.726307 std 89.432085 min 0.710000 25% 0.710000 50% 0.710000 75% 5.200000 max 1670.000000 Name: URXEPB, dtype: float64 count 2805.000000 mean 243.073144 std 588.042226 min 0.710000 25% 12.800000 50% 55.000000 75% 222.000000 max 12700.000000 Name: URXMPB, dtype: float64 count 2805.000000 mean 57.291094 std 154.411930 min 0.070000 25% 1.100000 50% 6.700000 75% 42.900000 max 2650.000000 Name: URXPPB, dtype: float64 count 2805.000000 mean 5.498061 std 18.922592 min 0.140000 25% 1.200000 50% 2.400000 75% 4.620000 max 730.250000 Name: URXCNP, dtype: float64 count 2805.000000 mean 35.131515 std 83.125018 min 0.210000 25% 4.100000 50% 9.500000 75% 25.800000 max 979.800000 Name: URXCOP, dtype: float64 count 2805.000000 mean 49.044674 std 139.269138 min 0.140000 25% 8.200000 50% 17.100000 75% 39.100000 max 3252.400000 Name: URXECP, dtype: float64 count 2805.000000 mean 23.856873 std 31.452609 min 0.280000 25% 7.300000 50% 15.000000 75% 29.200000 max 549.100000 Name: URXMBP, dtype: float64 count 2805.000000 mean 5.810806 std 19.565722 min 0.140000 25% 1.000000 50% 2.160000 75% 4.900000 max 564.000000 Name: URXMC1, dtype: float64 count 2805.000000 mean 228.292381 std 888.435109 min 0.373400 25% 22.000000 50% 57.222000 75% 163.548000 max 31660.000000 Name: URXMEP, dtype: float64 count 2805.000000 mean 35.862503 std 121.366228 min 0.140000 25% 4.500000 50% 11.100000 75% 24.400000 max 2622.400000 Name: URXMHH, dtype: float64 count 2805.000000 mean 4.724720 std 14.180914 min 0.350000 25% 0.780000 50% 1.500000 75% 3.600000 max 296.010000 Name: URXMHP, dtype: float64 count 2805.000000 mean 14.216086 std 21.582126 min 0.140000 25% 4.300000 50% 8.800000 75% 17.500000 max 600.360000 Name: URXMIB, dtype: float64 count 2805.000000 mean 2.438085 std 7.667234 min 0.350000 25% 0.640000 50% 0.871200 75% 1.100000 max 185.190000 Name: URXMNP, dtype: float64 count 2805.000000 mean 21.401697 std 72.577524 min 0.140000 25% 3.030000 50% 7.000000 75% 15.500000 max 1815.700000 Name: URXMOH, dtype: float64 count 2805.000000 mean 14.553603 std 25.535677 min 0.150000 25% 2.900000 50% 7.200000 75% 16.680000 max 432.340000 Name: URXMZP, dtype: float64 count 2.712000e+03 mean 8.922565e+03 std 1.184863e+05 min 3.390000e+01 25% 6.048250e+02 50% 1.420000e+03 75% 4.590900e+03 max 5.259896e+06 Name: URXP01, dtype: float64 count 2734.000000 mean 8378.445830 std 10927.613086 min 107.000000 25% 2099.550000 50% 4388.000000 75% 9993.250000 max 99149.800000 Name: URXP02, dtype: float64 count 2737.000000 mean 286.377019 std 579.517736 min 5.700000 25% 43.000000 50% 89.000000 75% 220.000000 max 9800.000000 Name: URXP03, dtype: float64 count 2746.00000 mean 515.64177 std 911.35088 min 5.70000 25% 109.85000 50% 217.50000 75% 490.15000 max 11000.00000 Name: URXP04, dtype: float64 count 2756.000000 mean 186.918541 std 247.883314 min 6.400000 25% 64.000000 50% 119.000000 75% 221.000000 max 5864.000000 Name: URXP06, dtype: float64 count 8.212000e+03 mean 1.585567e+01 std 1.013999e+01 min 5.397605e-79 25% 9.000000e+00 50% 1.370000e+01 75% 2.020000e+01 max 1.476000e+02 Name: DR1TFIBE, dtype: float64 count 8.212000e+03 mean 9.473273e+02 std 1.078099e+03 min 5.397605e-79 25% 8.888000e+01 50% 5.925000e+02 75% 1.422000e+03 max 1.344000e+04 Name: DR1_320Z, dtype: float64
# skewness for continuous variables
for col in continuous_nhanes:
print(col, train_data[col].skew())
RIDAGEYR 0.8672621568446248 BMXBMI 1.1462643133680315 ALQ130 6.4381175237368335 DR1TKCAL 1.4356755307383746 DR1TCARB 1.607732347529472 DR1TSUGR 2.2577637335569793 DR1TTFAT 1.672442042483449 LBXIN 3.5652171420709124 LBXGLU 6.8029425992903105 LBDGLUSI 6.8038321787185945 LBXBCD 4.5048509935695185 LBXBPB 11.47756096435242 LBXTHG 5.698266037863675 URXUAS 11.727396509827233 LBXMPAH 11.058890832571905 LBXPFDO 15.010882440719659 LBXPFNA 8.517421532102865 LBXPFHS 3.988378698636329 LBXPFDE 15.696840664557849 LBXPFUA 7.702216337139375 URXBPH 45.572501663930346 URXTRS 4.873152170159706 URXBP3 14.199414991670123 URXBUP 15.548290772509839 URXEPB 8.966710675323327 URXMPB 7.909891162993048 URXPPB 6.548659119693288 URXCNP 23.390371231747434 URXCOP 5.599068661042671 URXECP 11.183242086545858 URXMBP 5.954095388975746 URXMC1 15.623827047208819 URXMEP 19.943051710448454 URXMHH 11.931674405382374 URXMHP 10.213954921191291 URXMIB 11.149177383117541 URXMNP 10.48701049415024 URXMOH 13.089600441871058 URXMZP 7.1876636146873825 URXP01 38.41994472859625 URXP02 3.220380836524587 URXP03 5.347160513074789 URXP04 5.0060403203933 URXP06 7.7526891624003165 DR1TFIBE 1.9555736383641864 DR1_320Z 2.034475117803634
# testing skewness with log transformations
for col in continuous_nhanes:
print(col, np.log(train_data[col]).skew())
RIDAGEYR 0.11878386900215072 BMXBMI 0.3825506192962267 ALQ130 0.49048167778687485 DR1TKCAL -0.4839475885237581 DR1TCARB -1.203029953989769 DR1TSUGR -75.62626974189726 DR1TTFAT -1.044074825925181 LBXIN 0.12463739036539195 LBXGLU 3.4480394895575137 LBDGLUSI 3.4487586871975653 LBXBCD 0.8418863463720669 LBXBPB 0.42568039164064253 LBXTHG 0.5142849970205279 URXUAS 0.5928336027292153 LBXMPAH 0.6515821205470501 LBXPFDO 1.3490181097460523 LBXPFNA -0.15900846087694975 LBXPFHS -0.22484170406839224 LBXPFDE 0.642921609003418 LBXPFUA 1.252271600441551 URXBPH 0.23254414913906832 URXTRS 0.7550411702003581 URXBP3 0.5685894611836925 URXBUP 1.7159076836901581 URXEPB 1.4328065867310291 URXMPB 0.010052198016668214 URXPPB 0.08541909117296817 URXCNP 0.39736251358159236 URXCOP 0.4020180776696439 URXECP 0.3455273049645498 URXMBP -0.6468760047480185 URXMC1 0.3328804634889151 URXMEP 0.30809018302531327 URXMHH 0.3666079920511518 URXMHP 0.9099345315127817 URXMIB -0.41426540504478615 URXMNP 2.036429286931458 URXMOH 0.3504792899416325 URXMZP -0.2650781562248332 URXP01 0.5576786277571182 URXP02 -0.0209716001833414 URXP03 0.5858877015847936 URXP04 0.42857292174379424 URXP06 0.07125463495949712 DR1TFIBE -33.428860277892866 DR1_320Z -1.2033472414989406
# missing data
train_data.isnull().sum()/len(train_data)*100
SEQN 0.000000 diabetes 0.000000 LBDGLUSI 0.000000 LBDGLTSI 0.000000 RIAGENDR 0.000000 RIDAGEYR 0.000000 RIDRETH1 0.000000 DMDEDUC2 32.914167 INDHHIN2 4.672358 DR1TKCAL 5.260729 DR1TCARB 5.260729 DR1TSUGR 5.260729 DR1TTFAT 5.260729 DR1TFIBE 5.260729 DR1_320Z 5.260729 BMXBMI 0.865251 LBXBCD 15.401477 LBXBPB 15.401477 LBXTHG 15.401477 URXUAS 67.051223 LBXGLU 0.000000 LBXIN 2.365021 LBXMPAH 67.305030 LBXPFDO 67.305030 LBXPFNA 67.305030 LBXPFHS 67.305030 LBXPFDE 67.305030 LBXPFUA 67.305030 URXP01 68.712506 URXP02 68.458699 URXP03 68.424089 URXP04 68.320258 URXP06 68.204892 URXBPH 67.639594 URXTRS 67.639594 URXBP3 67.639594 URXBUP 67.639594 URXEPB 67.639594 URXMPB 67.639594 URXPPB 67.639594 URXCNP 67.639594 URXCOP 67.639594 URXECP 67.639594 URXMBP 67.639594 URXMC1 67.639594 URXMEP 67.639594 URXMHH 67.639594 URXMHP 67.639594 URXMIB 67.639594 URXMNP 67.639594 URXMOH 67.639594 URXMZP 67.639594 ALQ130 54.476234 PAQ710 35.809875 SMQ020 30.595293 DPQ010 32.937240 SLQ050 17.108906 MCQ300C 34.229349 HUQ051 0.080757 HIQ011 0.265344 HOQ065 1.280572 dtype: float64
# missing data percentage
train_data.isnull().sum()/len(train_data)*100
SEQN 0.000000 diabetes 0.000000 LBDGLUSI 0.000000 LBDGLTSI 0.000000 RIAGENDR 0.000000 RIDAGEYR 0.000000 RIDRETH1 0.000000 DMDEDUC2 32.914167 INDHHIN2 4.672358 DR1TKCAL 5.260729 DR1TCARB 5.260729 DR1TSUGR 5.260729 DR1TTFAT 5.260729 DR1TFIBE 5.260729 DR1_320Z 5.260729 BMXBMI 0.865251 LBXBCD 15.401477 LBXBPB 15.401477 LBXTHG 15.401477 URXUAS 67.051223 LBXGLU 0.000000 LBXIN 2.365021 LBXMPAH 67.305030 LBXPFDO 67.305030 LBXPFNA 67.305030 LBXPFHS 67.305030 LBXPFDE 67.305030 LBXPFUA 67.305030 URXP01 68.712506 URXP02 68.458699 URXP03 68.424089 URXP04 68.320258 URXP06 68.204892 URXBPH 67.639594 URXTRS 67.639594 URXBP3 67.639594 URXBUP 67.639594 URXEPB 67.639594 URXMPB 67.639594 URXPPB 67.639594 URXCNP 67.639594 URXCOP 67.639594 URXECP 67.639594 URXMBP 67.639594 URXMC1 67.639594 URXMEP 67.639594 URXMHH 67.639594 URXMHP 67.639594 URXMIB 67.639594 URXMNP 67.639594 URXMOH 67.639594 URXMZP 67.639594 ALQ130 54.476234 PAQ710 35.809875 SMQ020 30.595293 DPQ010 32.937240 SLQ050 17.108906 MCQ300C 34.229349 HUQ051 0.080757 HIQ011 0.265344 HOQ065 1.280572 dtype: float64
Univariate Visualizations¶
# boxplots for continuous variables
for col in continuous_nhanes:
plt.boxplot(train_data[col].dropna())
plt.ylabel('Frequency')
plt.xlabel(col)
plt.show()
# histograms for continuous variables
for col in continuous_nhanes:
plt.hist(train_data[col], color='skyblue', edgecolor='black')
plt.ylabel('Frequency')
plt.xlabel(col)
plt.show()
# bar charts for categorical variables
for col in categorical_nhanes:
ax = train_data[col].value_counts().plot(kind = 'bar')
ax.set_ylabel("Frequency")
ax.set_xlabel(col)
plt.show()
Discussion for Univariate Visualizations¶
All of the continuous variables seem to be right skewed with many outliers. When looking at the outliers in the boxplots and comparing them to the histograms, they seem to make sense because the bulk of the data seems to have lower values but there are sizeable counts of individuals with larger values that decrease the larger the values get. Applying a log transformation to the all of the continuous data (all of which are skewed) decreases the skewness for most of them. The only instance where the skew gets worse is with DR1TFIBE where the skew goes from a small positive value to a large negative value after the log transformation.
Log Transformations¶
continuous_nhanes_log = ["RIDAGEYR", "BMXBMI", "ALQ130", "DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "LBXIN", "LBXGLU",
"LBDGLUSI", "LBXBCD", "LBXBPB", "LBXTHG", "URXUAS", "LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA", "URXBPH", "URXTRS",
"URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB", "URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1",
"URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP", "URXP01", "URXP02", "URXP03",
"URXP04", "URXP06", "DR1_320Z"]
for col in continuous_nhanes_log:
train_data[col] = np.log(train_data[col])
test_data[col] = np.log(test_data[col])
Imputation¶
# mode
def impute_mode(df, column_name):
mode_value = df[column_name].mode()[0]
df[column_name].fillna(mode_value, inplace=True)
return df
# median
def impute_median(df, column_name):
median_value = df[column_name].median()
df[column_name].fillna(median_value, inplace=True)
return df
# Initialize MICE imputer
imputer = IterativeImputer(max_iter = 10, random_state = 78)
# Demographic Variables (Education, Income) -- Mode
train_data = impute_mode(train_data, "DMDEDUC2")
train_data = impute_mode(train_data, "INDHHIN2")
# Health Variables (BMI, Alcohol, Sleep) -- Median
train_data = impute_median(train_data, "BMXBMI")
train_data = impute_median(train_data, "ALQ130")
train_data = impute_median(train_data, "SLQ050")
# Dietary Variables (Calories, Carbs, Sugar, Fat, Fiber, Water) -- Multivariate Imputation by Chained Equations (MICE)
dietary_imputed = pd.DataFrame(imputer.fit_transform(train_data[["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"]]), columns=["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"])
train_data[["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"]] = dietary_imputed
# Heavy Metals (Cadmium, Mercury, Lead, Arsenic) -- Multivariate Imputation by Chained Equations (MICE)/median
metals_imputed = pd.DataFrame(imputer.fit_transform(train_data[["LBXBCD", "LBXBPB", "LBXTHG"]]), columns=["LBXBCD", "LBXBPB", "LBXTHG"])
train_data[["LBXBCD", "LBXBPB", "LBXTHG"]] = metals_imputed
train_data = impute_median(train_data, "URXUAS") # keep same
# Glucose -- Median
train_data = impute_median(train_data, "LBXIN")
# PFAs -- Multivariate Imputation by Chained Equations (MICE)
pfa_imputed = pd.DataFrame(imputer.fit_transform(train_data[["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"]]), columns=["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"])
train_data[["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"]] = pfa_imputed
# PAHs -- Multivariate Imputation by Chained Equations (MICE)
pah_imputed = pd.DataFrame(imputer.fit_transform(train_data[["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"]]), columns=["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"])
train_data[["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"]] = pah_imputed
# Environmental Phthalates -- Multivariate Imputation by Chained Equations (MICE)
eph_imputed = pd.DataFrame(imputer.fit_transform(train_data[["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"]]), columns=["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"])
train_data[["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"]] = eph_imputed
# Phthalates -- Multivariate Imputation by Chained Equations (MICE)
phthalates_imputed = pd.DataFrame(imputer.fit_transform(train_data[["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"]]), columns=["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"])
train_data[["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"]] = phthalates_imputed
# Behavioral Variables -- Mode
train_data = impute_mode(train_data, "HUQ051")
train_data = impute_mode(train_data, "HIQ011")
train_data = impute_mode(train_data, "HOQ065")
train_data = impute_mode(train_data, "MCQ300C")
train_data = impute_mode(train_data, "PAQ710")
train_data = impute_mode(train_data, "SMQ020")
train_data = impute_mode(train_data, "DPQ010")
# Demographic Variables (Education, Income) -- Mode
test_data = impute_mode(test_data, "DMDEDUC2")
test_data = impute_mode(test_data, "INDHHIN2")
# Health Variables (BMI, Alcohol, Sleep) -- Median
test_data = impute_median(test_data, "BMXBMI")
test_data = impute_median(test_data, "ALQ130")
test_data = impute_median(test_data, "SLQ050")
# Dietary Variables (Calories, Carbs, Sugar, Fat, Fiber, Water) -- Multivariate Imputation by Chained Equations (MICE)
dietary_imputed = pd.DataFrame(imputer.fit_transform(test_data[["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"]]), columns=["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"])
test_data[["DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "DR1TFIBE", "DR1_320Z"]] = dietary_imputed
# Heavy Metals (Cadmium, Mercury, Lead, Arsenic) -- Multivariate Imputation by Chained Equations (MICE)/median
metals_imputed = pd.DataFrame(imputer.fit_transform(test_data[["LBXBCD", "LBXBPB", "LBXTHG"]]), columns=["LBXBCD", "LBXBPB", "LBXTHG"])
test_data[["LBXBCD", "LBXBPB", "LBXTHG"]] = metals_imputed
test_data = impute_median(test_data, "URXUAS") # keep same
# Glucose -- Median
test_data = impute_median(test_data, "LBXIN")
# PFAs -- Multivariate Imputation by Chained Equations (MICE)
pfa_imputed = pd.DataFrame(imputer.fit_transform(test_data[["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"]]), columns=["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"])
test_data[["LBXMPAH", "LBXPFDO", "LBXPFNA", "LBXPFHS", "LBXPFDE", "LBXPFUA"]] = pfa_imputed
# PAHs -- Multivariate Imputation by Chained Equations (MICE)
pah_imputed = pd.DataFrame(imputer.fit_transform(test_data[["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"]]), columns=["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"])
test_data[["URXP01", "URXP02", "URXP03", "URXP04", "URXP06"]] = pah_imputed
# Environmental Phthalates -- Multivariate Imputation by Chained Equations (MICE)
eph_imputed = pd.DataFrame(imputer.fit_transform(test_data[["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"]]), columns=["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"])
test_data[["URXBPH", "URXTRS", "URXBP3", "URXBUP", "URXEPB", "URXMPB", "URXPPB"]] = eph_imputed
# Phthalates -- Multivariate Imputation by Chained Equations (MICE)
phthalates_imputed = pd.DataFrame(imputer.fit_transform(test_data[["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"]]), columns=["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"])
test_data[["URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMC1", "URXMEP", "URXMHH", "URXMHP", "URXMIB", "URXMNP", "URXMOH", "URXMZP"]] = phthalates_imputed
# Behavioral Variables -- Mode
test_data = impute_mode(test_data, "HUQ051")
test_data = impute_mode(test_data, "HIQ011")
test_data = impute_mode(test_data, "HOQ065")
test_data = impute_mode(test_data, "MCQ300C")
test_data = impute_mode(test_data, "PAQ710")
test_data = impute_mode(test_data, "SMQ020")
test_data = impute_mode(test_data, "DPQ010")
Exploratory Data Analysis and Bivariate Analysis¶
# correlations
numerical_nhanes = train_data[continuous_nhanes]
numerical_nhanes["diabetes"] = pd.to_numeric(train_data["diabetes"], errors="coerce")
corr = numerical_nhanes.loc[:,~numerical_nhanes.columns.duplicated()].corr()
corr.head(100)
corr.style.background_gradient(cmap='coolwarm')
| Ā | RIDAGEYR | BMXBMI | ALQ130 | DR1TKCAL | DR1TCARB | DR1TSUGR | DR1TTFAT | LBXIN | LBXGLU | LBDGLUSI | LBXBCD | LBXBPB | LBXTHG | URXUAS | LBXMPAH | LBXPFDO | LBXPFNA | LBXPFHS | LBXPFDE | LBXPFUA | URXBPH | URXTRS | URXBP3 | URXBUP | URXEPB | URXMPB | URXPPB | URXCNP | URXCOP | URXECP | URXMBP | URXMC1 | URXMEP | URXMHH | URXMHP | URXMIB | URXMNP | URXMOH | URXMZP | URXP01 | URXP02 | URXP03 | URXP04 | URXP06 | DR1TFIBE | DR1_320Z | diabetes |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| RIDAGEYR | 1.000000 | 0.276835 | -0.049195 | -0.022535 | -0.063251 | -0.025177 | -0.027491 | -0.177180 | 0.275097 | 0.275131 | 0.359195 | 0.415399 | 0.280666 | 0.078363 | 0.008716 | -0.000235 | 0.071768 | 0.038425 | 0.104914 | 0.114822 | -0.052036 | 0.007028 | -0.007265 | 0.000539 | 0.050070 | 0.017795 | 0.002019 | -0.019836 | -0.039036 | -0.072532 | -0.077049 | -0.052441 | 0.008772 | -0.071952 | -0.057099 | -0.078938 | -0.037882 | -0.084542 | -0.127855 | 0.089793 | 0.011790 | 0.020125 | 0.034065 | 0.024007 | 0.110284 | 0.021556 | 0.231925 |
| BMXBMI | 0.276835 | 1.000000 | 0.070655 | -0.010689 | -0.039629 | -0.002268 | 0.004131 | 0.465490 | 0.226896 | 0.226870 | 0.024432 | 0.015792 | 0.031764 | 0.016020 | -0.057258 | -0.028180 | -0.022313 | -0.022017 | -0.034604 | -0.050581 | 0.017345 | 0.006948 | -0.011590 | -0.027212 | -0.015703 | 0.013729 | 0.004688 | 0.054427 | 0.046988 | 0.024656 | 0.016557 | 0.022802 | 0.072135 | 0.023753 | 0.011157 | 0.043477 | 0.000334 | 0.020893 | 0.018343 | 0.000241 | 0.096730 | 0.000286 | 0.045689 | 0.057947 | -0.003748 | 0.024218 | 0.144196 |
| ALQ130 | -0.049195 | 0.070655 | 1.000000 | 0.122885 | 0.070473 | 0.010235 | 0.062827 | 0.007606 | 0.056448 | 0.056438 | 0.097814 | 0.095320 | -0.041142 | 0.020626 | -0.034401 | -0.013071 | -0.011042 | 0.016803 | -0.025146 | -0.034601 | 0.054806 | -0.012328 | -0.047695 | -0.027835 | 0.015435 | -0.022472 | -0.033499 | 0.016736 | 0.010988 | 0.017814 | 0.030500 | 0.031569 | 0.025856 | 0.025792 | 0.028521 | 0.025864 | 0.023444 | 0.019869 | 0.040268 | 0.087382 | 0.103168 | 0.126131 | 0.118798 | 0.079881 | -0.015875 | -0.043760 | 0.020119 |
| DR1TKCAL | -0.022535 | -0.010689 | 0.122885 | 1.000000 | 0.877693 | 0.258928 | 0.872380 | 0.000511 | 0.007269 | 0.007290 | -0.004915 | 0.034032 | 0.002365 | 0.011704 | 0.015039 | 0.002527 | 0.022639 | 0.048849 | 0.010015 | -0.020095 | 0.010087 | 0.019699 | -0.018234 | -0.038261 | -0.014865 | -0.039840 | -0.055292 | 0.040792 | 0.034353 | 0.054915 | -0.010179 | 0.051504 | 0.002181 | 0.061417 | 0.045509 | -0.021754 | 0.037501 | 0.054698 | 0.012892 | 0.029931 | -0.011944 | 0.034726 | 0.028441 | 0.016261 | 0.534127 | -0.043463 | -0.027442 |
| DR1TCARB | -0.063251 | -0.039629 | 0.070473 | 0.877693 | 1.000000 | 0.450036 | 0.633181 | 0.029471 | -0.000078 | -0.000066 | -0.033193 | 0.001565 | -0.044464 | -0.015182 | 0.027896 | 0.001369 | 0.012000 | 0.036902 | -0.013819 | -0.035910 | 0.011760 | 0.015637 | -0.025200 | -0.027503 | -0.038929 | -0.037678 | -0.052249 | 0.033731 | 0.019189 | 0.053835 | -0.007952 | 0.040538 | 0.006686 | 0.051327 | 0.030530 | -0.023047 | 0.016704 | 0.048178 | 0.014769 | 0.014994 | -0.020790 | 0.011470 | 0.009867 | 0.005457 | 0.550612 | -0.064822 | -0.030970 |
| DR1TSUGR | -0.025177 | -0.002268 | 0.010235 | 0.258928 | 0.450036 | 1.000000 | 0.175195 | 0.025237 | 0.008435 | 0.008442 | -0.021060 | -0.013685 | -0.025431 | -0.010501 | 0.015692 | 0.006104 | 0.004378 | 0.013645 | -0.006975 | -0.014593 | 0.014659 | 0.004016 | -0.007062 | -0.001962 | -0.014748 | -0.002526 | -0.007367 | 0.017578 | 0.008009 | 0.024111 | 0.010608 | 0.017884 | 0.009579 | 0.022351 | 0.013380 | 0.001655 | 0.004944 | 0.022366 | 0.014281 | 0.011504 | 0.000098 | 0.009283 | 0.009833 | 0.010220 | 0.118924 | -0.048236 | -0.011102 |
| DR1TTFAT | -0.027491 | 0.004131 | 0.062827 | 0.872380 | 0.633181 | 0.175195 | 1.000000 | 0.019355 | 0.007106 | 0.007119 | -0.035198 | -0.010152 | -0.013558 | -0.006299 | -0.000635 | 0.000724 | 0.003414 | 0.037473 | 0.001049 | -0.035104 | 0.007298 | 0.017551 | -0.006594 | -0.032553 | -0.018610 | -0.023084 | -0.031622 | 0.045956 | 0.042326 | 0.054523 | -0.006636 | 0.054428 | -0.002662 | 0.062336 | 0.048650 | -0.014188 | 0.040118 | 0.059006 | 0.017889 | 0.007935 | -0.015958 | 0.014238 | 0.009136 | 0.003620 | 0.418043 | -0.011403 | -0.023162 |
| LBXIN | -0.177180 | 0.465490 | 0.007606 | 0.000511 | 0.029471 | 0.025237 | 0.019355 | 1.000000 | 0.211596 | 0.211561 | -0.151057 | -0.138398 | -0.112682 | -0.021270 | -0.052404 | -0.053862 | -0.037252 | -0.038886 | -0.086738 | -0.106293 | 0.019975 | -0.011724 | -0.034506 | -0.032427 | -0.063172 | -0.006174 | -0.010344 | 0.032946 | 0.064482 | 0.061831 | 0.049967 | 0.052259 | 0.037279 | 0.062876 | 0.031826 | 0.079954 | 0.040170 | 0.062996 | 0.063261 | -0.047075 | 0.045784 | -0.041164 | -0.006053 | 0.017406 | -0.030956 | -0.027378 | 0.124081 |
| LBXGLU | 0.275097 | 0.226896 | 0.056448 | 0.007269 | -0.000078 | 0.008435 | 0.007106 | 0.211596 | 1.000000 | 0.999994 | 0.050378 | 0.144172 | 0.033099 | 0.024445 | -0.011941 | -0.006374 | 0.011420 | 0.015452 | 0.024183 | 0.002555 | -0.036025 | -0.022449 | -0.068459 | -0.047853 | -0.060605 | -0.046914 | -0.066700 | -0.009345 | -0.000618 | -0.004863 | -0.012439 | -0.004157 | 0.004544 | -0.004699 | -0.012199 | 0.007175 | -0.002584 | -0.010806 | -0.041545 | 0.017032 | 0.034159 | -0.006506 | 0.011443 | 0.003245 | 0.032780 | 0.008299 | 0.652247 |
| LBDGLUSI | 0.275131 | 0.226870 | 0.056438 | 0.007290 | -0.000066 | 0.008442 | 0.007119 | 0.211561 | 0.999994 | 1.000000 | 0.050424 | 0.144230 | 0.033159 | 0.024522 | -0.011908 | -0.006326 | 0.011426 | 0.015497 | 0.024244 | 0.002633 | -0.036008 | -0.022485 | -0.068446 | -0.047854 | -0.060634 | -0.046903 | -0.066726 | -0.009302 | -0.000629 | -0.004815 | -0.012410 | -0.004097 | 0.004545 | -0.004660 | -0.012172 | 0.007154 | -0.002558 | -0.010765 | -0.041558 | 0.017038 | 0.034114 | -0.006522 | 0.011431 | 0.003207 | 0.032748 | 0.008312 | 0.652336 |
| LBXBCD | 0.359195 | 0.024432 | 0.097814 | -0.004915 | -0.033193 | -0.021060 | -0.035198 | -0.151057 | 0.050378 | 0.050424 | 1.000000 | 0.385347 | 0.127550 | 0.056038 | 0.042105 | 0.016960 | 0.021396 | -0.010610 | 0.035651 | 0.059691 | -0.004681 | -0.026339 | -0.035683 | 0.004770 | 0.051736 | 0.003359 | 0.000491 | -0.021199 | -0.039583 | -0.024228 | 0.008434 | -0.018317 | 0.004204 | -0.020392 | -0.003666 | -0.018351 | -0.017925 | -0.024708 | -0.021559 | 0.283455 | 0.170835 | 0.287831 | 0.265966 | 0.109110 | -0.047857 | -0.081805 | 0.068836 |
| LBXBPB | 0.415399 | 0.015792 | 0.095320 | 0.034032 | 0.001565 | -0.013685 | -0.010152 | -0.138398 | 0.144172 | 0.144230 | 0.385347 | 1.000000 | 0.199550 | 0.107696 | 0.076846 | 0.092868 | 0.091027 | 0.052800 | 0.099836 | 0.102385 | -0.001403 | -0.029658 | -0.079913 | -0.031903 | -0.009271 | -0.045575 | -0.073508 | -0.019247 | -0.077987 | 0.004016 | 0.004545 | -0.031711 | 0.022656 | 0.005668 | -0.005111 | -0.036558 | -0.044696 | -0.005132 | -0.033446 | 0.168714 | 0.008869 | 0.121121 | 0.118584 | 0.074794 | 0.040778 | -0.053055 | 0.081720 |
| LBXTHG | 0.280666 | 0.031764 | -0.041142 | 0.002365 | -0.044464 | -0.025431 | -0.013558 | -0.112682 | 0.033099 | 0.033159 | 0.127550 | 0.199550 | 1.000000 | 0.300720 | -0.008065 | 0.042309 | 0.089770 | 0.001277 | 0.151698 | 0.228828 | -0.040945 | 0.043969 | 0.056446 | 0.052046 | 0.059693 | 0.024701 | 0.013835 | -0.013142 | -0.012859 | -0.023710 | -0.035459 | -0.025321 | -0.006683 | -0.021957 | -0.000913 | -0.027209 | -0.008042 | -0.026370 | -0.064066 | 0.003810 | -0.072821 | -0.043994 | -0.040516 | -0.017936 | 0.073392 | 0.081617 | 0.029791 |
| URXUAS | 0.078363 | 0.016020 | 0.020626 | 0.011704 | -0.015182 | -0.010501 | -0.006299 | -0.021270 | 0.024445 | 0.024522 | 0.056038 | 0.107696 | 0.300720 | 1.000000 | 0.007184 | 0.031487 | 0.049663 | -0.002054 | 0.072972 | 0.098317 | 0.036320 | 0.026710 | 0.028407 | 0.019473 | 0.040041 | 0.040130 | 0.024067 | 0.034547 | 0.046197 | 0.039519 | 0.057045 | 0.055757 | 0.021110 | 0.036763 | 0.056583 | 0.051446 | 0.051441 | 0.036990 | 0.031205 | 0.092305 | 0.105401 | 0.098307 | 0.112249 | 0.144847 | -0.005508 | 0.006512 | 0.018170 |
| LBXMPAH | 0.008716 | -0.057258 | -0.034401 | 0.015039 | 0.027896 | 0.015692 | -0.000635 | -0.052404 | -0.011941 | -0.011908 | 0.042105 | 0.076846 | -0.008065 | 0.007184 | 1.000000 | 0.367104 | 0.273211 | 0.264158 | 0.262634 | 0.171176 | 0.075008 | 0.051121 | -0.011414 | 0.080892 | 0.007470 | 0.007771 | 0.001948 | 0.034075 | -0.046931 | 0.117080 | 0.094307 | 0.057998 | 0.086494 | 0.110962 | 0.059949 | -0.003407 | -0.000381 | 0.109072 | 0.078877 | 0.016038 | -0.017730 | 0.001807 | -0.002726 | -0.006888 | -0.022200 | -0.037555 | -0.005594 |
| LBXPFDO | -0.000235 | -0.028180 | -0.013071 | 0.002527 | 0.001369 | 0.006104 | 0.000724 | -0.053862 | -0.006374 | -0.006326 | 0.016960 | 0.092868 | 0.042309 | 0.031487 | 0.367104 | 1.000000 | 0.240295 | 0.159096 | 0.366735 | 0.361501 | 0.025458 | 0.051817 | -0.015623 | 0.081563 | -0.011895 | 0.029106 | 0.018965 | -0.003502 | -0.063741 | 0.082549 | 0.041606 | 0.001521 | 0.047163 | 0.080600 | 0.038000 | -0.037051 | -0.019794 | 0.079758 | 0.014725 | 0.018443 | -0.013376 | 0.008766 | 0.000830 | -0.001308 | -0.025001 | -0.025179 | -0.016019 |
| LBXPFNA | 0.071768 | -0.022313 | -0.011042 | 0.022639 | 0.012000 | 0.004378 | 0.003414 | -0.037252 | 0.011420 | 0.011426 | 0.021396 | 0.091027 | 0.089770 | 0.049663 | 0.273211 | 0.240295 | 1.000000 | 0.431411 | 0.711883 | 0.574719 | 0.035871 | 0.048710 | 0.006984 | 0.088406 | 0.004547 | 0.036437 | 0.007447 | 0.014503 | -0.029907 | 0.075620 | 0.031733 | 0.036177 | 0.066441 | 0.077031 | 0.044171 | -0.042825 | -0.016515 | 0.070293 | -0.005844 | -0.007564 | -0.024268 | -0.013178 | -0.011752 | -0.021259 | -0.012025 | -0.027074 | 0.006191 |
| LBXPFHS | 0.038425 | -0.022017 | 0.016803 | 0.048849 | 0.036902 | 0.013645 | 0.037473 | -0.038886 | 0.015452 | 0.015497 | -0.010610 | 0.052800 | 0.001277 | -0.002054 | 0.264158 | 0.159096 | 0.431411 | 1.000000 | 0.287648 | 0.165070 | 0.057208 | 0.024684 | 0.006031 | 0.008788 | -0.019081 | -0.027768 | -0.053234 | 0.055550 | 0.004775 | 0.048265 | 0.017592 | 0.036828 | 0.041176 | 0.053880 | 0.032293 | -0.032771 | 0.016587 | 0.045418 | 0.004436 | 0.014354 | -0.000354 | 0.010638 | 0.007438 | 0.000595 | -0.002336 | -0.023900 | 0.005793 |
| LBXPFDE | 0.104914 | -0.034604 | -0.025146 | 0.010015 | -0.013819 | -0.006975 | 0.001049 | -0.086738 | 0.024183 | 0.024244 | 0.035651 | 0.099836 | 0.151698 | 0.072972 | 0.262634 | 0.366735 | 0.711883 | 0.287648 | 1.000000 | 0.754365 | -0.007550 | 0.044202 | 0.007670 | 0.082564 | 0.016857 | 0.027077 | 0.002849 | -0.027274 | -0.059262 | 0.025619 | 0.004139 | -0.007652 | 0.035889 | 0.033574 | 0.023473 | -0.058854 | -0.015399 | 0.025236 | -0.040253 | -0.005773 | -0.033340 | -0.018106 | -0.018499 | -0.022704 | 0.001055 | -0.004412 | 0.022107 |
| LBXPFUA | 0.114822 | -0.050581 | -0.034601 | -0.020095 | -0.035910 | -0.014593 | -0.035104 | -0.106293 | 0.002555 | 0.002633 | 0.059691 | 0.102385 | 0.228828 | 0.098317 | 0.171176 | 0.361501 | 0.574719 | 0.165070 | 0.754365 | 1.000000 | -0.041593 | 0.049573 | 0.000575 | 0.089885 | 0.033351 | 0.036968 | 0.011258 | -0.060819 | -0.075107 | -0.005569 | -0.014625 | -0.030603 | -0.002710 | -0.000166 | 0.013113 | -0.069707 | -0.029655 | -0.006340 | -0.066336 | -0.007065 | -0.043761 | -0.018591 | -0.019319 | -0.023169 | -0.000267 | 0.008976 | 0.016343 |
| URXBPH | -0.052036 | 0.017345 | 0.054806 | 0.010087 | 0.011760 | 0.014659 | 0.007298 | 0.019975 | -0.036025 | -0.036008 | -0.004681 | -0.001403 | -0.040945 | 0.036320 | 0.075008 | 0.025458 | 0.035871 | 0.057208 | -0.007550 | -0.041593 | 1.000000 | 0.147183 | 0.124354 | 0.147506 | 0.140161 | 0.227003 | 0.198849 | 0.349095 | 0.267763 | 0.463889 | 0.507276 | 0.409479 | 0.367465 | 0.481790 | 0.400325 | 0.425609 | 0.185482 | 0.490261 | 0.480276 | 0.208404 | 0.233228 | 0.246652 | 0.276966 | 0.292380 | -0.053165 | -0.071083 | -0.018315 |
| URXTRS | 0.007028 | 0.006948 | -0.012328 | 0.019699 | 0.015637 | 0.004016 | 0.017551 | -0.011724 | -0.022449 | -0.022485 | -0.026339 | -0.029658 | 0.043969 | 0.026710 | 0.051121 | 0.051817 | 0.048710 | 0.024684 | 0.044202 | 0.049573 | 0.147183 | 1.000000 | 0.147692 | 0.176276 | 0.142249 | 0.166520 | 0.154998 | 0.134445 | 0.089091 | 0.209084 | 0.170628 | 0.133347 | 0.200237 | 0.194528 | 0.167909 | 0.105607 | 0.079227 | 0.195391 | 0.115123 | 0.019495 | 0.028982 | 0.014788 | 0.032948 | 0.088000 | 0.013946 | 0.025922 | -0.004257 |
| URXBP3 | -0.007265 | -0.011590 | -0.047695 | -0.018234 | -0.025200 | -0.007062 | -0.006594 | -0.034506 | -0.068459 | -0.068446 | -0.035683 | -0.079913 | 0.056446 | 0.028407 | -0.011414 | -0.015623 | 0.006984 | 0.006031 | 0.007670 | 0.000575 | 0.124354 | 0.147692 | 1.000000 | 0.216625 | 0.259654 | 0.277838 | 0.298543 | 0.173518 | 0.187773 | 0.158292 | 0.181523 | 0.191800 | 0.113358 | 0.139980 | 0.112130 | 0.149969 | 0.101400 | 0.145904 | 0.115187 | -0.003789 | 0.040930 | -0.017669 | 0.008097 | 0.096956 | 0.015527 | 0.048153 | -0.033809 |
| URXBUP | 0.000539 | -0.027212 | -0.027835 | -0.038261 | -0.027503 | -0.001962 | -0.032553 | -0.032427 | -0.047853 | -0.047854 | 0.004770 | -0.031903 | 0.052046 | 0.019473 | 0.080892 | 0.081563 | 0.088406 | 0.008788 | 0.082564 | 0.089885 | 0.147506 | 0.176276 | 0.216625 | 1.000000 | 0.420127 | 0.418030 | 0.445810 | 0.092791 | -0.010443 | 0.219584 | 0.196666 | 0.135580 | 0.223052 | 0.217083 | 0.180310 | 0.074325 | 0.043175 | 0.214734 | 0.104504 | 0.031885 | 0.020255 | 0.008582 | 0.034143 | 0.083219 | -0.013645 | 0.012961 | -0.003387 |
| URXEPB | 0.050070 | -0.015703 | 0.015435 | -0.014865 | -0.038929 | -0.014748 | -0.018610 | -0.063172 | -0.060605 | -0.060634 | 0.051736 | -0.009271 | 0.059693 | 0.040041 | 0.007470 | -0.011895 | 0.004547 | -0.019081 | 0.016857 | 0.033351 | 0.140161 | 0.142249 | 0.259654 | 0.420127 | 1.000000 | 0.487131 | 0.481681 | 0.112136 | 0.120277 | 0.109904 | 0.172112 | 0.130442 | 0.176011 | 0.117050 | 0.138134 | 0.154251 | 0.099813 | 0.110185 | 0.091529 | 0.089756 | 0.094462 | 0.085148 | 0.093562 | 0.123945 | -0.012617 | 0.012675 | -0.026673 |
| URXMPB | 0.017795 | 0.013729 | -0.022472 | -0.039840 | -0.037678 | -0.002526 | -0.023084 | -0.006174 | -0.046914 | -0.046903 | 0.003359 | -0.045575 | 0.024701 | 0.040130 | 0.007771 | 0.029106 | 0.036437 | -0.027768 | 0.027077 | 0.036968 | 0.227003 | 0.166520 | 0.277838 | 0.418030 | 0.487131 | 1.000000 | 0.827147 | 0.172665 | 0.127276 | 0.234936 | 0.308348 | 0.193652 | 0.352957 | 0.238077 | 0.213889 | 0.261624 | 0.103442 | 0.239427 | 0.211237 | 0.077430 | 0.131640 | 0.072077 | 0.094411 | 0.123903 | -0.020207 | 0.004155 | -0.014588 |
| URXPPB | 0.002019 | 0.004688 | -0.033499 | -0.055292 | -0.052249 | -0.007367 | -0.031622 | -0.010344 | -0.066700 | -0.066726 | 0.000491 | -0.073508 | 0.013835 | 0.024067 | 0.001948 | 0.018965 | 0.007447 | -0.053234 | 0.002849 | 0.011258 | 0.198849 | 0.154998 | 0.298543 | 0.445810 | 0.481681 | 0.827147 | 1.000000 | 0.140762 | 0.108131 | 0.193792 | 0.274269 | 0.166348 | 0.320985 | 0.203812 | 0.203874 | 0.219969 | 0.098657 | 0.206300 | 0.194011 | 0.041557 | 0.109976 | 0.047167 | 0.069413 | 0.106926 | -0.034523 | 0.007054 | -0.025858 |
| URXCNP | -0.019836 | 0.054427 | 0.016736 | 0.040792 | 0.033731 | 0.017578 | 0.045956 | 0.032946 | -0.009345 | -0.009302 | -0.021199 | -0.019247 | -0.013142 | 0.034547 | 0.034075 | -0.003502 | 0.014503 | 0.055550 | -0.027274 | -0.060819 | 0.349095 | 0.134445 | 0.173518 | 0.092791 | 0.112136 | 0.172665 | 0.140762 | 1.000000 | 0.618849 | 0.483670 | 0.367830 | 0.635708 | 0.236778 | 0.435868 | 0.359436 | 0.335661 | 0.435917 | 0.441603 | 0.327628 | 0.161853 | 0.178888 | 0.166450 | 0.188201 | 0.239195 | -0.011106 | -0.024189 | 0.003673 |
| URXCOP | -0.039036 | 0.046988 | 0.010988 | 0.034353 | 0.019189 | 0.008009 | 0.042326 | 0.064482 | -0.000618 | -0.000629 | -0.039583 | -0.077987 | -0.012859 | 0.046197 | -0.046931 | -0.063741 | -0.029907 | 0.004775 | -0.059262 | -0.075107 | 0.267763 | 0.089091 | 0.187773 | -0.010443 | 0.120277 | 0.127276 | 0.108131 | 0.618849 | 1.000000 | 0.351404 | 0.260520 | 0.732770 | 0.112946 | 0.313209 | 0.282293 | 0.368948 | 0.669951 | 0.324965 | 0.257313 | 0.080142 | 0.159030 | 0.102285 | 0.116859 | 0.166852 | -0.004938 | -0.008356 | -0.000488 |
| URXECP | -0.072532 | 0.024656 | 0.017814 | 0.054915 | 0.053835 | 0.024111 | 0.054523 | 0.061831 | -0.004863 | -0.004815 | -0.024228 | 0.004016 | -0.023710 | 0.039519 | 0.117080 | 0.082549 | 0.075620 | 0.048265 | 0.025619 | -0.005569 | 0.463889 | 0.209084 | 0.158292 | 0.219584 | 0.109904 | 0.234936 | 0.193792 | 0.483670 | 0.351404 | 1.000000 | 0.601894 | 0.571587 | 0.409682 | 0.954633 | 0.785646 | 0.427790 | 0.308156 | 0.959086 | 0.522091 | 0.176638 | 0.171112 | 0.184258 | 0.219289 | 0.274339 | -0.012043 | -0.039747 | 0.005039 |
| URXMBP | -0.077049 | 0.016557 | 0.030500 | -0.010179 | -0.007952 | 0.010608 | -0.006636 | 0.049967 | -0.012439 | -0.012410 | 0.008434 | 0.004545 | -0.035459 | 0.057045 | 0.094307 | 0.041606 | 0.031733 | 0.017592 | 0.004139 | -0.014625 | 0.507276 | 0.170628 | 0.181523 | 0.196666 | 0.172112 | 0.308348 | 0.274269 | 0.367830 | 0.260520 | 0.601894 | 1.000000 | 0.508607 | 0.456371 | 0.631331 | 0.501003 | 0.704682 | 0.221166 | 0.642819 | 0.713835 | 0.228500 | 0.279663 | 0.268746 | 0.306498 | 0.349468 | -0.058330 | -0.056617 | -0.004023 |
| URXMC1 | -0.052441 | 0.022802 | 0.031569 | 0.051504 | 0.040538 | 0.017884 | 0.054428 | 0.052259 | -0.004157 | -0.004097 | -0.018317 | -0.031711 | -0.025321 | 0.055757 | 0.057998 | 0.001521 | 0.036177 | 0.036828 | -0.007652 | -0.030603 | 0.409479 | 0.133347 | 0.191800 | 0.135580 | 0.130442 | 0.193652 | 0.166348 | 0.635708 | 0.732770 | 0.571587 | 0.508607 | 1.000000 | 0.275164 | 0.556277 | 0.454681 | 0.440910 | 0.615210 | 0.563545 | 0.456788 | 0.183443 | 0.210298 | 0.198018 | 0.221253 | 0.268346 | -0.007257 | -0.042238 | -0.011307 |
| URXMEP | 0.008772 | 0.072135 | 0.025856 | 0.002181 | 0.006686 | 0.009579 | -0.002662 | 0.037279 | 0.004544 | 0.004545 | 0.004204 | 0.022656 | -0.006683 | 0.021110 | 0.086494 | 0.047163 | 0.066441 | 0.041176 | 0.035889 | -0.002710 | 0.367465 | 0.200237 | 0.113358 | 0.223052 | 0.176011 | 0.352957 | 0.320985 | 0.236778 | 0.112946 | 0.409682 | 0.456371 | 0.275164 | 1.000000 | 0.414057 | 0.323891 | 0.354573 | 0.114608 | 0.413991 | 0.373875 | 0.157758 | 0.200942 | 0.165065 | 0.199519 | 0.222287 | -0.033298 | -0.043929 | 0.014999 |
| URXMHH | -0.071952 | 0.023753 | 0.025792 | 0.061417 | 0.051327 | 0.022351 | 0.062336 | 0.062876 | -0.004699 | -0.004660 | -0.020392 | 0.005668 | -0.021957 | 0.036763 | 0.110962 | 0.080600 | 0.077031 | 0.053880 | 0.033574 | -0.000166 | 0.481790 | 0.194528 | 0.139980 | 0.217083 | 0.117050 | 0.238077 | 0.203812 | 0.435868 | 0.313209 | 0.954633 | 0.631331 | 0.556277 | 0.414057 | 1.000000 | 0.815825 | 0.462766 | 0.301488 | 0.987170 | 0.557243 | 0.190354 | 0.177058 | 0.213512 | 0.241441 | 0.294033 | -0.019916 | -0.045528 | 0.002565 |
| URXMHP | -0.057099 | 0.011157 | 0.028521 | 0.045509 | 0.030530 | 0.013380 | 0.048650 | 0.031826 | -0.012199 | -0.012172 | -0.003666 | -0.005111 | -0.000913 | 0.056583 | 0.059949 | 0.038000 | 0.044171 | 0.032293 | 0.023473 | 0.013113 | 0.400325 | 0.167909 | 0.112130 | 0.180310 | 0.138134 | 0.213889 | 0.203874 | 0.359436 | 0.282293 | 0.785646 | 0.501003 | 0.454681 | 0.323891 | 0.815825 | 1.000000 | 0.398512 | 0.402269 | 0.811381 | 0.427362 | 0.175129 | 0.186194 | 0.227031 | 0.240318 | 0.259142 | -0.016418 | -0.031536 | -0.007709 |
| URXMIB | -0.078938 | 0.043477 | 0.025864 | -0.021754 | -0.023047 | 0.001655 | -0.014188 | 0.079954 | 0.007175 | 0.007154 | -0.018351 | -0.036558 | -0.027209 | 0.051446 | -0.003407 | -0.037051 | -0.042825 | -0.032771 | -0.058854 | -0.069707 | 0.425609 | 0.105607 | 0.149969 | 0.074325 | 0.154251 | 0.261624 | 0.219969 | 0.335661 | 0.368948 | 0.427790 | 0.704682 | 0.440910 | 0.354573 | 0.462766 | 0.398512 | 1.000000 | 0.222114 | 0.476698 | 0.580313 | 0.178442 | 0.282471 | 0.225757 | 0.255908 | 0.302743 | -0.039326 | -0.039532 | -0.007790 |
| URXMNP | -0.037882 | 0.000334 | 0.023444 | 0.037501 | 0.016704 | 0.004944 | 0.040118 | 0.040170 | -0.002584 | -0.002558 | -0.017925 | -0.044696 | -0.008042 | 0.051441 | -0.000381 | -0.019794 | -0.016515 | 0.016587 | -0.015399 | -0.029655 | 0.185482 | 0.079227 | 0.101400 | 0.043175 | 0.099813 | 0.103442 | 0.098657 | 0.435917 | 0.669951 | 0.308156 | 0.221166 | 0.615210 | 0.114608 | 0.301488 | 0.402269 | 0.222114 | 1.000000 | 0.295867 | 0.176750 | 0.064845 | 0.103344 | 0.095345 | 0.095047 | 0.118141 | -0.011444 | -0.022203 | -0.008673 |
| URXMOH | -0.084542 | 0.020893 | 0.019869 | 0.054698 | 0.048178 | 0.022366 | 0.059006 | 0.062996 | -0.010806 | -0.010765 | -0.024708 | -0.005132 | -0.026370 | 0.036990 | 0.109072 | 0.079758 | 0.070293 | 0.045418 | 0.025236 | -0.006340 | 0.490261 | 0.195391 | 0.145904 | 0.214734 | 0.110185 | 0.239427 | 0.206300 | 0.441603 | 0.324965 | 0.959086 | 0.642819 | 0.563545 | 0.413991 | 0.987170 | 0.811381 | 0.476698 | 0.295867 | 1.000000 | 0.573781 | 0.188118 | 0.180969 | 0.209293 | 0.239041 | 0.294115 | -0.023223 | -0.046657 | -0.003231 |
| URXMZP | -0.127855 | 0.018343 | 0.040268 | 0.012892 | 0.014769 | 0.014281 | 0.017889 | 0.063261 | -0.041545 | -0.041558 | -0.021559 | -0.033446 | -0.064066 | 0.031205 | 0.078877 | 0.014725 | -0.005844 | 0.004436 | -0.040253 | -0.066336 | 0.480276 | 0.115123 | 0.115187 | 0.104504 | 0.091529 | 0.211237 | 0.194011 | 0.327628 | 0.257313 | 0.522091 | 0.713835 | 0.456788 | 0.373875 | 0.557243 | 0.427362 | 0.580313 | 0.176750 | 0.573781 | 1.000000 | 0.185719 | 0.234520 | 0.247408 | 0.273666 | 0.304638 | -0.053932 | -0.053804 | -0.027208 |
| URXP01 | 0.089793 | 0.000241 | 0.087382 | 0.029931 | 0.014994 | 0.011504 | 0.007935 | -0.047075 | 0.017032 | 0.017038 | 0.283455 | 0.168714 | 0.003810 | 0.092305 | 0.016038 | 0.018443 | -0.007564 | 0.014354 | -0.005773 | -0.007065 | 0.208404 | 0.019495 | -0.003789 | 0.031885 | 0.089756 | 0.077430 | 0.041557 | 0.161853 | 0.080142 | 0.176638 | 0.228500 | 0.183443 | 0.157758 | 0.190354 | 0.175129 | 0.178442 | 0.064845 | 0.188118 | 0.185719 | 1.000000 | 0.580734 | 0.739686 | 0.722313 | 0.600033 | -0.037827 | -0.077841 | 0.021351 |
| URXP02 | 0.011790 | 0.096730 | 0.103168 | -0.011944 | -0.020790 | 0.000098 | -0.015958 | 0.045784 | 0.034159 | 0.034114 | 0.170835 | 0.008869 | -0.072821 | 0.105401 | -0.017730 | -0.013376 | -0.024268 | -0.000354 | -0.033340 | -0.043761 | 0.233228 | 0.028982 | 0.040930 | 0.020255 | 0.094462 | 0.131640 | 0.109976 | 0.178888 | 0.159030 | 0.171112 | 0.279663 | 0.210298 | 0.200942 | 0.177058 | 0.186194 | 0.282471 | 0.103344 | 0.180969 | 0.234520 | 0.580734 | 1.000000 | 0.654846 | 0.683528 | 0.567045 | -0.075100 | -0.068848 | 0.023527 |
| URXP03 | 0.020125 | 0.000286 | 0.126131 | 0.034726 | 0.011470 | 0.009283 | 0.014238 | -0.041164 | -0.006506 | -0.006522 | 0.287831 | 0.121121 | -0.043994 | 0.098307 | 0.001807 | 0.008766 | -0.013178 | 0.010638 | -0.018106 | -0.018591 | 0.246652 | 0.014788 | -0.017669 | 0.008582 | 0.085148 | 0.072077 | 0.047167 | 0.166450 | 0.102285 | 0.184258 | 0.268746 | 0.198018 | 0.165065 | 0.213512 | 0.227031 | 0.225757 | 0.095345 | 0.209293 | 0.247408 | 0.739686 | 0.654846 | 1.000000 | 0.957671 | 0.739636 | -0.076111 | -0.091917 | -0.005565 |
| URXP04 | 0.034065 | 0.045689 | 0.118798 | 0.028441 | 0.009867 | 0.009833 | 0.009136 | -0.006053 | 0.011443 | 0.011431 | 0.265966 | 0.118584 | -0.040516 | 0.112249 | -0.002726 | 0.000830 | -0.011752 | 0.007438 | -0.018499 | -0.019319 | 0.276966 | 0.032948 | 0.008097 | 0.034143 | 0.093562 | 0.094411 | 0.069413 | 0.188201 | 0.116859 | 0.219289 | 0.306498 | 0.221253 | 0.199519 | 0.241441 | 0.240318 | 0.255908 | 0.095047 | 0.239041 | 0.273666 | 0.722313 | 0.683528 | 0.957671 | 1.000000 | 0.803507 | -0.071718 | -0.091878 | 0.008342 |
| URXP06 | 0.024007 | 0.057947 | 0.079881 | 0.016261 | 0.005457 | 0.010220 | 0.003620 | 0.017406 | 0.003245 | 0.003207 | 0.109110 | 0.074794 | -0.017936 | 0.144847 | -0.006888 | -0.001308 | -0.021259 | 0.000595 | -0.022704 | -0.023169 | 0.292380 | 0.088000 | 0.096956 | 0.083219 | 0.123945 | 0.123903 | 0.106926 | 0.239195 | 0.166852 | 0.274339 | 0.349468 | 0.268346 | 0.222287 | 0.294033 | 0.259142 | 0.302743 | 0.118141 | 0.294115 | 0.304638 | 0.600033 | 0.567045 | 0.739636 | 0.803507 | 1.000000 | -0.044658 | -0.075691 | 0.005294 |
| DR1TFIBE | 0.110284 | -0.003748 | -0.015875 | 0.534127 | 0.550612 | 0.118924 | 0.418043 | -0.030956 | 0.032780 | 0.032748 | -0.047857 | 0.040778 | 0.073392 | -0.005508 | -0.022200 | -0.025001 | -0.012025 | -0.002336 | 0.001055 | -0.000267 | -0.053165 | 0.013946 | 0.015527 | -0.013645 | -0.012617 | -0.020207 | -0.034523 | -0.011106 | -0.004938 | -0.012043 | -0.058330 | -0.007257 | -0.033298 | -0.019916 | -0.016418 | -0.039326 | -0.011444 | -0.023223 | -0.053932 | -0.037827 | -0.075100 | -0.076111 | -0.071718 | -0.044658 | 1.000000 | 0.098028 | 0.012433 |
| DR1_320Z | 0.021556 | 0.024218 | -0.043760 | -0.043463 | -0.064822 | -0.048236 | -0.011403 | -0.027378 | 0.008299 | 0.008312 | -0.081805 | -0.053055 | 0.081617 | 0.006512 | -0.037555 | -0.025179 | -0.027074 | -0.023900 | -0.004412 | 0.008976 | -0.071083 | 0.025922 | 0.048153 | 0.012961 | 0.012675 | 0.004155 | 0.007054 | -0.024189 | -0.008356 | -0.039747 | -0.056617 | -0.042238 | -0.043929 | -0.045528 | -0.031536 | -0.039532 | -0.022203 | -0.046657 | -0.053804 | -0.077841 | -0.068848 | -0.091917 | -0.091878 | -0.075691 | 0.098028 | 1.000000 | 0.001123 |
| diabetes | 0.231925 | 0.144196 | 0.020119 | -0.027442 | -0.030970 | -0.011102 | -0.023162 | 0.124081 | 0.652247 | 0.652336 | 0.068836 | 0.081720 | 0.029791 | 0.018170 | -0.005594 | -0.016019 | 0.006191 | 0.005793 | 0.022107 | 0.016343 | -0.018315 | -0.004257 | -0.033809 | -0.003387 | -0.026673 | -0.014588 | -0.025858 | 0.003673 | -0.000488 | 0.005039 | -0.004023 | -0.011307 | 0.014999 | 0.002565 | -0.007709 | -0.007790 | -0.008673 | -0.003231 | -0.027208 | 0.021351 | 0.023527 | -0.005565 | 0.008342 | 0.005294 | 0.012433 | 0.001123 | 1.000000 |
# chi square tests
chisq_results = {}
categorical_nhanes.remove("diabetes")
for col in categorical_nhanes:
# Create a contingency table
contingency_table = pd.crosstab(train_data[col], train_data["diabetes"])
# Perform the Chi-Square test
chi2, p, dof, expected = chi2_contingency(contingency_table)
# Store results
chisq_results[col] = {"Chi2": chi2, "p-value": p, "Degrees of Freedom": dof}
# Convert results to a DataFrame for better visualization
chisq_results_df = pd.DataFrame(chisq_results).T
print(chisq_results_df)
Chi2 p-value Degrees of Freedom RIAGENDR 7.464367 6.293216e-03 1.0 RIDRETH1 2.141709 1.433421e-01 1.0 DMDEDUC2 172.867247 1.749690e-39 1.0 INDHHIN2 8.582897 3.393351e-03 1.0 DPQ010 8.269928 4.030711e-03 1.0 PAQ710 3.186036 7.426990e-02 1.0 HUQ051 4.112754 4.256100e-02 1.0 HOQ065 4.539456 3.312211e-02 1.0 SMQ020 56.818977 4.778330e-14 1.0 MCQ300C 177.497618 1.705346e-40 1.0 HIQ011 7.793265 5.244133e-03 1.0 SLQ050 3.312152 6.876937e-02 1.0
# pairplots among top 10 variables most correlated with diabetes
diq_corr = corr["diabetes"].abs().sort_values(ascending=False)
print(diq_corr.head(11))
num_corr_diabetes = ["LBDGLUSI", "LBXGLU", "RIDAGEYR", "LBXIN", "BMXBMI", "LBXBPB", "LBXTHG", "LBXBCD", "URXBP3", "DR1TCARB"]
nhanes_numeric_nonan = train_data[num_corr_diabetes].dropna()
sns.pairplot(nhanes_numeric_nonan, diag_kind="hist", plot_kws={'alpha': 0.5, 's': 5})
plt.show()
diabetes 1.000000 LBDGLUSI 0.652336 LBXGLU 0.652247 RIDAGEYR 0.231925 BMXBMI 0.144196 LBXIN 0.124081 LBXBPB 0.081720 LBXBCD 0.068836 URXBP3 0.033809 DR1TCARB 0.030970 LBXTHG 0.029791 Name: diabetes, dtype: float64
# top 18 variables least correlated with diabetes (corr value under 0.01)
diq_corr_asc = corr["diabetes"].abs().sort_values(ascending=True)
print(diq_corr_asc.head(18))
URXCOP 0.000488 DR1_320Z 0.001123 URXMHH 0.002565 URXMOH 0.003231 URXBUP 0.003387 URXCNP 0.003673 URXMBP 0.004023 URXTRS 0.004257 URXECP 0.005039 URXP06 0.005294 URXP03 0.005565 LBXMPAH 0.005594 LBXPFHS 0.005793 LBXPFNA 0.006191 URXMHP 0.007709 URXMIB 0.007790 URXP04 0.008342 URXMNP 0.008673 Name: diabetes, dtype: float64
# heatmaps
p_values = chisq_results_df["p-value"].astype(float).to_frame()
plt.figure(figsize=(8, 6))
sns.heatmap(p_values.T, annot=True, cmap="coolwarm", linewidths=0.5, fmt=".3f")
plt.title("Chi-Square Test p-values (Association with Diabetes)")
plt.show()
# drop variables (correlation value <0.01 with diabetes OR p-value >0.05 in chi-square test with diabetes)
train_data = train_data.drop(columns=["DR1_320Z", "LBXMPAH", "LBXPFNA", "LBXPFHS", "URXP03", "URXP04", "URXP06", "URXTRS",
"URXBUP", "URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMHH", "URXMHP", "URXMIB",
"URXMNP", "URXMOH", "RIDRETH1", "PAQ710", "SLQ050"])
test_data = test_data.drop(columns=["DR1_320Z", "LBXMPAH", "LBXPFNA", "LBXPFHS", "URXP03", "URXP04", "URXP06", "URXTRS",
"URXBUP", "URXCNP", "URXCOP", "URXECP", "URXMBP", "URXMHH", "URXMHP", "URXMIB",
"URXMNP", "URXMOH", "RIDRETH1", "PAQ710", "SLQ050"])
Discussion for Exploratory Data Analysis and Bivariate Analysis¶
LBDGLUSI and LBXGLU, DR1TCARB and DR1TKCAL, DR1TTFAT and DR1TKCAL, DR1TCARB and DR1TSUGR, URXECP and URXMOH, URXMOH and URXMHH are all highly correlated. Their direction of correlation makes sense as they are all measure similar things that have overlap with one another. In terms of correlations with diabetes, the predictors that are highly correlated with the target variable are LBDGLUSI at 0.644912, LBXGLU at 0.644864, and LBXIN at 0.166696. For the variables that I would remove because of how weak they are, I would choose to remove SLQ050, DR1TFIBE, DR1_320Z, URXP01, URXMNP, URXMNP, URXCNP, URXEPB, URXBUP, LBXPFUA, LBXPFHS, and LBXPFNA because their correlation values with the target variable are less than 0.01.
Exporting Train and Test Data¶
train_data.to_csv('/Users/kevinnguyen/Downloads/nhanes_training_data.csv', index=False)
test_data.to_csv('/Users/kevinnguyen/Downloads/nhanes_testing_data.csv', index=False)
train_data_og = train_data.drop(columns=['DPQ010', 'MCQ300C', 'HUQ051', 'HIQ011', 'HOQ065', 'DR1TFIBE'])
test_data_og = test_data.drop(columns=['DPQ010', 'MCQ300C', 'HUQ051', 'HIQ011', 'HOQ065', 'DR1TFIBE'])
train_data_og.to_csv('/Users/kevinnguyen/Downloads/nhanes_training_data_og.csv', index=False)
test_data_og.to_csv('/Users/kevinnguyen/Downloads/nhanes_testing_data_og.csv', index=False)
EDA Figures and Tables for Final Modeling Dataset¶
categorical_nhanes = ["RIAGENDR", "DMDEDUC2", "INDHHIN2", "DPQ010", "HUQ051", "HOQ065", "SMQ020",
"diabetes", "MCQ300C", "HIQ011"]
continuous_nhanes = ["RIDAGEYR", "BMXBMI", "ALQ130", "DR1TKCAL", "DR1TCARB", "DR1TSUGR", "DR1TTFAT", "LBXIN", "LBXGLU",
"LBDGLUSI", "LBXBCD", "LBXBPB", "LBXTHG", "URXUAS", "LBXPFDO", "LBXPFDE", "LBXPFUA", "URXBPH",
"URXBP3", "URXEPB", "URXMPB", "URXPPB", "URXMC1",
"URXMEP", "URXMZP", "URXP01", "URXP02", "DR1TFIBE"]
numerical_nhanes = train_data[continuous_nhanes]
numerical_nhanes["diabetes"] = pd.to_numeric(train_data["diabetes"], errors="coerce")
corr = numerical_nhanes.loc[:,~numerical_nhanes.columns.duplicated()].corr()
corr.head(100)
corr.style.background_gradient(cmap='coolwarm')
| Ā | RIDAGEYR | BMXBMI | ALQ130 | DR1TKCAL | DR1TCARB | DR1TSUGR | DR1TTFAT | LBXIN | LBXGLU | LBDGLUSI | LBXBCD | LBXBPB | LBXTHG | URXUAS | LBXPFDO | LBXPFDE | LBXPFUA | URXBPH | URXBP3 | URXEPB | URXMPB | URXPPB | URXMC1 | URXMEP | URXMZP | URXP01 | URXP02 | DR1TFIBE | diabetes |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| RIDAGEYR | 1.000000 | 0.276835 | -0.049195 | -0.022535 | -0.063251 | -0.025177 | -0.027491 | -0.177180 | 0.275097 | 0.275131 | 0.359195 | 0.415399 | 0.280666 | 0.078363 | -0.000235 | 0.104914 | 0.114822 | -0.052036 | -0.007265 | 0.050070 | 0.017795 | 0.002019 | -0.052441 | 0.008772 | -0.127855 | 0.089793 | 0.011790 | 0.110284 | 0.231925 |
| BMXBMI | 0.276835 | 1.000000 | 0.070655 | -0.010689 | -0.039629 | -0.002268 | 0.004131 | 0.465490 | 0.226896 | 0.226870 | 0.024432 | 0.015792 | 0.031764 | 0.016020 | -0.028180 | -0.034604 | -0.050581 | 0.017345 | -0.011590 | -0.015703 | 0.013729 | 0.004688 | 0.022802 | 0.072135 | 0.018343 | 0.000241 | 0.096730 | -0.003748 | 0.144196 |
| ALQ130 | -0.049195 | 0.070655 | 1.000000 | 0.122885 | 0.070473 | 0.010235 | 0.062827 | 0.007606 | 0.056448 | 0.056438 | 0.097814 | 0.095320 | -0.041142 | 0.020626 | -0.013071 | -0.025146 | -0.034601 | 0.054806 | -0.047695 | 0.015435 | -0.022472 | -0.033499 | 0.031569 | 0.025856 | 0.040268 | 0.087382 | 0.103168 | -0.015875 | 0.020119 |
| DR1TKCAL | -0.022535 | -0.010689 | 0.122885 | 1.000000 | 0.877693 | 0.258928 | 0.872380 | 0.000511 | 0.007269 | 0.007290 | -0.004915 | 0.034032 | 0.002365 | 0.011704 | 0.002527 | 0.010015 | -0.020095 | 0.010087 | -0.018234 | -0.014865 | -0.039840 | -0.055292 | 0.051504 | 0.002181 | 0.012892 | 0.029931 | -0.011944 | 0.534127 | -0.027442 |
| DR1TCARB | -0.063251 | -0.039629 | 0.070473 | 0.877693 | 1.000000 | 0.450036 | 0.633181 | 0.029471 | -0.000078 | -0.000066 | -0.033193 | 0.001565 | -0.044464 | -0.015182 | 0.001369 | -0.013819 | -0.035910 | 0.011760 | -0.025200 | -0.038929 | -0.037678 | -0.052249 | 0.040538 | 0.006686 | 0.014769 | 0.014994 | -0.020790 | 0.550612 | -0.030970 |
| DR1TSUGR | -0.025177 | -0.002268 | 0.010235 | 0.258928 | 0.450036 | 1.000000 | 0.175195 | 0.025237 | 0.008435 | 0.008442 | -0.021060 | -0.013685 | -0.025431 | -0.010501 | 0.006104 | -0.006975 | -0.014593 | 0.014659 | -0.007062 | -0.014748 | -0.002526 | -0.007367 | 0.017884 | 0.009579 | 0.014281 | 0.011504 | 0.000098 | 0.118924 | -0.011102 |
| DR1TTFAT | -0.027491 | 0.004131 | 0.062827 | 0.872380 | 0.633181 | 0.175195 | 1.000000 | 0.019355 | 0.007106 | 0.007119 | -0.035198 | -0.010152 | -0.013558 | -0.006299 | 0.000724 | 0.001049 | -0.035104 | 0.007298 | -0.006594 | -0.018610 | -0.023084 | -0.031622 | 0.054428 | -0.002662 | 0.017889 | 0.007935 | -0.015958 | 0.418043 | -0.023162 |
| LBXIN | -0.177180 | 0.465490 | 0.007606 | 0.000511 | 0.029471 | 0.025237 | 0.019355 | 1.000000 | 0.211596 | 0.211561 | -0.151057 | -0.138398 | -0.112682 | -0.021270 | -0.053862 | -0.086738 | -0.106293 | 0.019975 | -0.034506 | -0.063172 | -0.006174 | -0.010344 | 0.052259 | 0.037279 | 0.063261 | -0.047075 | 0.045784 | -0.030956 | 0.124081 |
| LBXGLU | 0.275097 | 0.226896 | 0.056448 | 0.007269 | -0.000078 | 0.008435 | 0.007106 | 0.211596 | 1.000000 | 0.999994 | 0.050378 | 0.144172 | 0.033099 | 0.024445 | -0.006374 | 0.024183 | 0.002555 | -0.036025 | -0.068459 | -0.060605 | -0.046914 | -0.066700 | -0.004157 | 0.004544 | -0.041545 | 0.017032 | 0.034159 | 0.032780 | 0.652247 |
| LBDGLUSI | 0.275131 | 0.226870 | 0.056438 | 0.007290 | -0.000066 | 0.008442 | 0.007119 | 0.211561 | 0.999994 | 1.000000 | 0.050424 | 0.144230 | 0.033159 | 0.024522 | -0.006326 | 0.024244 | 0.002633 | -0.036008 | -0.068446 | -0.060634 | -0.046903 | -0.066726 | -0.004097 | 0.004545 | -0.041558 | 0.017038 | 0.034114 | 0.032748 | 0.652336 |
| LBXBCD | 0.359195 | 0.024432 | 0.097814 | -0.004915 | -0.033193 | -0.021060 | -0.035198 | -0.151057 | 0.050378 | 0.050424 | 1.000000 | 0.385347 | 0.127550 | 0.056038 | 0.016960 | 0.035651 | 0.059691 | -0.004681 | -0.035683 | 0.051736 | 0.003359 | 0.000491 | -0.018317 | 0.004204 | -0.021559 | 0.283455 | 0.170835 | -0.047857 | 0.068836 |
| LBXBPB | 0.415399 | 0.015792 | 0.095320 | 0.034032 | 0.001565 | -0.013685 | -0.010152 | -0.138398 | 0.144172 | 0.144230 | 0.385347 | 1.000000 | 0.199550 | 0.107696 | 0.092868 | 0.099836 | 0.102385 | -0.001403 | -0.079913 | -0.009271 | -0.045575 | -0.073508 | -0.031711 | 0.022656 | -0.033446 | 0.168714 | 0.008869 | 0.040778 | 0.081720 |
| LBXTHG | 0.280666 | 0.031764 | -0.041142 | 0.002365 | -0.044464 | -0.025431 | -0.013558 | -0.112682 | 0.033099 | 0.033159 | 0.127550 | 0.199550 | 1.000000 | 0.300720 | 0.042309 | 0.151698 | 0.228828 | -0.040945 | 0.056446 | 0.059693 | 0.024701 | 0.013835 | -0.025321 | -0.006683 | -0.064066 | 0.003810 | -0.072821 | 0.073392 | 0.029791 |
| URXUAS | 0.078363 | 0.016020 | 0.020626 | 0.011704 | -0.015182 | -0.010501 | -0.006299 | -0.021270 | 0.024445 | 0.024522 | 0.056038 | 0.107696 | 0.300720 | 1.000000 | 0.031487 | 0.072972 | 0.098317 | 0.036320 | 0.028407 | 0.040041 | 0.040130 | 0.024067 | 0.055757 | 0.021110 | 0.031205 | 0.092305 | 0.105401 | -0.005508 | 0.018170 |
| LBXPFDO | -0.000235 | -0.028180 | -0.013071 | 0.002527 | 0.001369 | 0.006104 | 0.000724 | -0.053862 | -0.006374 | -0.006326 | 0.016960 | 0.092868 | 0.042309 | 0.031487 | 1.000000 | 0.366735 | 0.361501 | 0.025458 | -0.015623 | -0.011895 | 0.029106 | 0.018965 | 0.001521 | 0.047163 | 0.014725 | 0.018443 | -0.013376 | -0.025001 | -0.016019 |
| LBXPFDE | 0.104914 | -0.034604 | -0.025146 | 0.010015 | -0.013819 | -0.006975 | 0.001049 | -0.086738 | 0.024183 | 0.024244 | 0.035651 | 0.099836 | 0.151698 | 0.072972 | 0.366735 | 1.000000 | 0.754365 | -0.007550 | 0.007670 | 0.016857 | 0.027077 | 0.002849 | -0.007652 | 0.035889 | -0.040253 | -0.005773 | -0.033340 | 0.001055 | 0.022107 |
| LBXPFUA | 0.114822 | -0.050581 | -0.034601 | -0.020095 | -0.035910 | -0.014593 | -0.035104 | -0.106293 | 0.002555 | 0.002633 | 0.059691 | 0.102385 | 0.228828 | 0.098317 | 0.361501 | 0.754365 | 1.000000 | -0.041593 | 0.000575 | 0.033351 | 0.036968 | 0.011258 | -0.030603 | -0.002710 | -0.066336 | -0.007065 | -0.043761 | -0.000267 | 0.016343 |
| URXBPH | -0.052036 | 0.017345 | 0.054806 | 0.010087 | 0.011760 | 0.014659 | 0.007298 | 0.019975 | -0.036025 | -0.036008 | -0.004681 | -0.001403 | -0.040945 | 0.036320 | 0.025458 | -0.007550 | -0.041593 | 1.000000 | 0.124354 | 0.140161 | 0.227003 | 0.198849 | 0.409479 | 0.367465 | 0.480276 | 0.208404 | 0.233228 | -0.053165 | -0.018315 |
| URXBP3 | -0.007265 | -0.011590 | -0.047695 | -0.018234 | -0.025200 | -0.007062 | -0.006594 | -0.034506 | -0.068459 | -0.068446 | -0.035683 | -0.079913 | 0.056446 | 0.028407 | -0.015623 | 0.007670 | 0.000575 | 0.124354 | 1.000000 | 0.259654 | 0.277838 | 0.298543 | 0.191800 | 0.113358 | 0.115187 | -0.003789 | 0.040930 | 0.015527 | -0.033809 |
| URXEPB | 0.050070 | -0.015703 | 0.015435 | -0.014865 | -0.038929 | -0.014748 | -0.018610 | -0.063172 | -0.060605 | -0.060634 | 0.051736 | -0.009271 | 0.059693 | 0.040041 | -0.011895 | 0.016857 | 0.033351 | 0.140161 | 0.259654 | 1.000000 | 0.487131 | 0.481681 | 0.130442 | 0.176011 | 0.091529 | 0.089756 | 0.094462 | -0.012617 | -0.026673 |
| URXMPB | 0.017795 | 0.013729 | -0.022472 | -0.039840 | -0.037678 | -0.002526 | -0.023084 | -0.006174 | -0.046914 | -0.046903 | 0.003359 | -0.045575 | 0.024701 | 0.040130 | 0.029106 | 0.027077 | 0.036968 | 0.227003 | 0.277838 | 0.487131 | 1.000000 | 0.827147 | 0.193652 | 0.352957 | 0.211237 | 0.077430 | 0.131640 | -0.020207 | -0.014588 |
| URXPPB | 0.002019 | 0.004688 | -0.033499 | -0.055292 | -0.052249 | -0.007367 | -0.031622 | -0.010344 | -0.066700 | -0.066726 | 0.000491 | -0.073508 | 0.013835 | 0.024067 | 0.018965 | 0.002849 | 0.011258 | 0.198849 | 0.298543 | 0.481681 | 0.827147 | 1.000000 | 0.166348 | 0.320985 | 0.194011 | 0.041557 | 0.109976 | -0.034523 | -0.025858 |
| URXMC1 | -0.052441 | 0.022802 | 0.031569 | 0.051504 | 0.040538 | 0.017884 | 0.054428 | 0.052259 | -0.004157 | -0.004097 | -0.018317 | -0.031711 | -0.025321 | 0.055757 | 0.001521 | -0.007652 | -0.030603 | 0.409479 | 0.191800 | 0.130442 | 0.193652 | 0.166348 | 1.000000 | 0.275164 | 0.456788 | 0.183443 | 0.210298 | -0.007257 | -0.011307 |
| URXMEP | 0.008772 | 0.072135 | 0.025856 | 0.002181 | 0.006686 | 0.009579 | -0.002662 | 0.037279 | 0.004544 | 0.004545 | 0.004204 | 0.022656 | -0.006683 | 0.021110 | 0.047163 | 0.035889 | -0.002710 | 0.367465 | 0.113358 | 0.176011 | 0.352957 | 0.320985 | 0.275164 | 1.000000 | 0.373875 | 0.157758 | 0.200942 | -0.033298 | 0.014999 |
| URXMZP | -0.127855 | 0.018343 | 0.040268 | 0.012892 | 0.014769 | 0.014281 | 0.017889 | 0.063261 | -0.041545 | -0.041558 | -0.021559 | -0.033446 | -0.064066 | 0.031205 | 0.014725 | -0.040253 | -0.066336 | 0.480276 | 0.115187 | 0.091529 | 0.211237 | 0.194011 | 0.456788 | 0.373875 | 1.000000 | 0.185719 | 0.234520 | -0.053932 | -0.027208 |
| URXP01 | 0.089793 | 0.000241 | 0.087382 | 0.029931 | 0.014994 | 0.011504 | 0.007935 | -0.047075 | 0.017032 | 0.017038 | 0.283455 | 0.168714 | 0.003810 | 0.092305 | 0.018443 | -0.005773 | -0.007065 | 0.208404 | -0.003789 | 0.089756 | 0.077430 | 0.041557 | 0.183443 | 0.157758 | 0.185719 | 1.000000 | 0.580734 | -0.037827 | 0.021351 |
| URXP02 | 0.011790 | 0.096730 | 0.103168 | -0.011944 | -0.020790 | 0.000098 | -0.015958 | 0.045784 | 0.034159 | 0.034114 | 0.170835 | 0.008869 | -0.072821 | 0.105401 | -0.013376 | -0.033340 | -0.043761 | 0.233228 | 0.040930 | 0.094462 | 0.131640 | 0.109976 | 0.210298 | 0.200942 | 0.234520 | 0.580734 | 1.000000 | -0.075100 | 0.023527 |
| DR1TFIBE | 0.110284 | -0.003748 | -0.015875 | 0.534127 | 0.550612 | 0.118924 | 0.418043 | -0.030956 | 0.032780 | 0.032748 | -0.047857 | 0.040778 | 0.073392 | -0.005508 | -0.025001 | 0.001055 | -0.000267 | -0.053165 | 0.015527 | -0.012617 | -0.020207 | -0.034523 | -0.007257 | -0.033298 | -0.053932 | -0.037827 | -0.075100 | 1.000000 | 0.012433 |
| diabetes | 0.231925 | 0.144196 | 0.020119 | -0.027442 | -0.030970 | -0.011102 | -0.023162 | 0.124081 | 0.652247 | 0.652336 | 0.068836 | 0.081720 | 0.029791 | 0.018170 | -0.016019 | 0.022107 | 0.016343 | -0.018315 | -0.033809 | -0.026673 | -0.014588 | -0.025858 | -0.011307 | 0.014999 | -0.027208 | 0.021351 | 0.023527 | 0.012433 | 1.000000 |
chisq_results = {}
categorical_nhanes.remove("diabetes")
for col in categorical_nhanes:
# Create a contingency table
contingency_table = pd.crosstab(train_data[col], train_data["diabetes"])
# Perform the Chi-Square test
chi2, p, dof, expected = chi2_contingency(contingency_table)
# Store results
chisq_results[col] = {"Chi2": chi2, "p-value": p, "Degrees of Freedom": dof}
# Convert results to a DataFrame for better visualization
chisq_results_df = pd.DataFrame(chisq_results).T
print(chisq_results_df)
Chi2 p-value Degrees of Freedom RIAGENDR 7.464367 6.293216e-03 1.0 DMDEDUC2 172.867247 1.749690e-39 1.0 INDHHIN2 8.582897 3.393351e-03 1.0 DPQ010 8.269928 4.030711e-03 1.0 HUQ051 4.112754 4.256100e-02 1.0 HOQ065 4.539456 3.312211e-02 1.0 SMQ020 56.818977 4.778330e-14 1.0 MCQ300C 177.497618 1.705346e-40 1.0 HIQ011 7.793265 5.244133e-03 1.0
diq_corr = corr["diabetes"].abs().sort_values(ascending=False)
print(diq_corr.head(11))
num_corr_diabetes = ["LBDGLUSI", "LBXGLU", "RIDAGEYR", "LBXIN", "BMXBMI", "LBXBPB", "LBXTHG", "LBXBCD", "URXBP3", "DR1TCARB"]
nhanes_numeric_nonan = train_data[num_corr_diabetes].dropna()
sns.pairplot(nhanes_numeric_nonan, diag_kind="hist", plot_kws={'alpha': 0.5, 's': 5})
plt.show()
diabetes 1.000000 LBDGLUSI 0.652336 LBXGLU 0.652247 RIDAGEYR 0.231925 BMXBMI 0.144196 LBXIN 0.124081 LBXBPB 0.081720 LBXBCD 0.068836 URXBP3 0.033809 DR1TCARB 0.030970 LBXTHG 0.029791 Name: diabetes, dtype: float64
p_values = chisq_results_df["p-value"].astype(float).to_frame()
plt.figure(figsize=(8, 6))
sns.heatmap(p_values.T, annot=True, cmap="coolwarm", linewidths=0.5, fmt=".3f")
plt.title("Chi-Square Test p-values (Association with Diabetes)")
plt.show()